| Line Number |
../DebugInfoTest/example_mips_dbg.ll
BUT NOT
../DebugInfoTest/example_mips.ll
|
Line Number |
../DebugInfoTest/example_mips.ll
BUT NOT
../DebugInfoTest/example_mips_dbg.ll
|
| 1 |
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===// |
1 |
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===// |
| 2 |
// |
2 |
// |
| 3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
3 |
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 |
// See https://llvm.org/LICENSE.txt for license information. |
4 |
// See https://llvm.org/LICENSE.txt for license information. |
| 5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
5 |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 |
// |
6 |
// |
| 7 |
//===----------------------------------------------------------------------===// |
7 |
//===----------------------------------------------------------------------===// |
| 8 |
// |
8 |
// |
| 9 |
// This transformation analyzes and transforms the induction variables (and |
9 |
// This transformation analyzes and transforms the induction variables (and |
| 10 |
// computations derived from them) into forms suitable for efficient execution |
10 |
// computations derived from them) into forms suitable for efficient execution |
| 11 |
// on the target. |
11 |
// on the target. |
| 12 |
// |
12 |
// |
| 13 |
// This pass performs a strength reduction on array references inside loops that |
13 |
// This pass performs a strength reduction on array references inside loops that |
| 14 |
// have as one or more of their components the loop induction variable, it |
14 |
// have as one or more of their components the loop induction variable, it |
| 15 |
// rewrites expressions to take advantage of scaled-index addressing modes |
15 |
// rewrites expressions to take advantage of scaled-index addressing modes |
| 16 |
// available on the target, and it performs a variety of other optimizations |
16 |
// available on the target, and it performs a variety of other optimizations |
| 17 |
// related to loop induction variables. |
17 |
// related to loop induction variables. |
| 18 |
// |
18 |
// |
| 19 |
// Terminology note: this code has a lot of handling for "post-increment" or |
19 |
// Terminology note: this code has a lot of handling for "post-increment" or |
| 20 |
// "post-inc" users. This is not talking about post-increment addressing modes; |
20 |
// "post-inc" users. This is not talking about post-increment addressing modes; |
| 21 |
// it is instead talking about code like this: |
21 |
// it is instead talking about code like this: |
| 22 |
// |
22 |
// |
| 23 |
// %i = phi [ 0, %entry ], [ %i.next, %latch ] |
23 |
// %i = phi [ 0, %entry ], [ %i.next, %latch ] |
| 24 |
// ... |
24 |
// ... |
| 25 |
// %i.next = add %i, 1 |
25 |
// %i.next = add %i, 1 |
| 26 |
// %c = icmp eq %i.next, %n |
26 |
// %c = icmp eq %i.next, %n |
| 27 |
// |
27 |
// |
| 28 |
// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however |
28 |
// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however |
| 29 |
// it's useful to think about these as the same register, with some uses using |
29 |
// it's useful to think about these as the same register, with some uses using |
| 30 |
// the value of the register before the add and some using it after. In this |
30 |
// the value of the register before the add and some using it after. In this |
| 31 |
// example, the icmp is a post-increment user, since it uses %i.next, which is |
31 |
// example, the icmp is a post-increment user, since it uses %i.next, which is |
| 32 |
// the value of the induction variable after the increment. The other common |
32 |
// the value of the induction variable after the increment. The other common |
| 33 |
// case of post-increment users is users outside the loop. |
33 |
// case of post-increment users is users outside the loop. |
| 34 |
// |
34 |
// |
| 35 |
// TODO: More sophistication in the way Formulae are generated and filtered. |
35 |
// TODO: More sophistication in the way Formulae are generated and filtered. |
| 36 |
// |
36 |
// |
| 37 |
// TODO: Handle multiple loops at a time. |
37 |
// TODO: Handle multiple loops at a time. |
| 38 |
// |
38 |
// |
| 39 |
// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead |
39 |
// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead |
| 40 |
// of a GlobalValue? |
40 |
// of a GlobalValue? |
| 41 |
// |
41 |
// |
| 42 |
// TODO: When truncation is free, truncate ICmp users' operands to make it a |
42 |
// TODO: When truncation is free, truncate ICmp users' operands to make it a |
| 43 |
// smaller encoding (on x86 at least). |
43 |
// smaller encoding (on x86 at least). |
| 44 |
// |
44 |
// |
| 45 |
// TODO: When a negated register is used by an add (such as in a list of |
45 |
// TODO: When a negated register is used by an add (such as in a list of |
| 46 |
// multiple base registers, or as the increment expression in an addrec), |
46 |
// multiple base registers, or as the increment expression in an addrec), |
| 47 |
// we may not actually need both reg and (-1 * reg) in registers; the |
47 |
// we may not actually need both reg and (-1 * reg) in registers; the |
| 48 |
// negation can be implemented by using a sub instead of an add. The |
48 |
// negation can be implemented by using a sub instead of an add. The |
| 49 |
// lack of support for taking this into consideration when making |
49 |
// lack of support for taking this into consideration when making |
| 50 |
// register pressure decisions is partly worked around by the "Special" |
50 |
// register pressure decisions is partly worked around by the "Special" |
| 51 |
// use kind. |
51 |
// use kind. |
| 52 |
// |
52 |
// |
| 53 |
//===----------------------------------------------------------------------===// |
53 |
//===----------------------------------------------------------------------===// |
| 54 |
|
54 |
|
| 55 |
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" |
55 |
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" |
| 56 |
#include "llvm/ADT/APInt.h" |
56 |
#include "llvm/ADT/APInt.h" |
| 57 |
#include "llvm/ADT/DenseMap.h" |
57 |
#include "llvm/ADT/DenseMap.h" |
| 58 |
#include "llvm/ADT/DenseSet.h" |
58 |
#include "llvm/ADT/DenseSet.h" |
| 59 |
#include "llvm/ADT/Hashing.h" |
59 |
#include "llvm/ADT/Hashing.h" |
| 60 |
#include "llvm/ADT/PointerIntPair.h" |
60 |
#include "llvm/ADT/PointerIntPair.h" |
| 61 |
#include "llvm/ADT/STLExtras.h" |
61 |
#include "llvm/ADT/STLExtras.h" |
| 62 |
#include "llvm/ADT/SetVector.h" |
62 |
#include "llvm/ADT/SetVector.h" |
| 63 |
#include "llvm/ADT/SmallBitVector.h" |
63 |
#include "llvm/ADT/SmallBitVector.h" |
| 64 |
#include "llvm/ADT/SmallPtrSet.h" |
64 |
#include "llvm/ADT/SmallPtrSet.h" |
| 65 |
#include "llvm/ADT/SmallSet.h" |
65 |
#include "llvm/ADT/SmallSet.h" |
| 66 |
#include "llvm/ADT/SmallVector.h" |
66 |
#include "llvm/ADT/SmallVector.h" |
| 67 |
#include "llvm/ADT/Statistic.h" |
67 |
#include "llvm/ADT/Statistic.h" |
| 68 |
#include "llvm/ADT/iterator_range.h" |
68 |
#include "llvm/ADT/iterator_range.h" |
| 69 |
#include "llvm/Analysis/AssumptionCache.h" |
69 |
#include "llvm/Analysis/AssumptionCache.h" |
| 70 |
#include "llvm/Analysis/IVUsers.h" |
70 |
#include "llvm/Analysis/IVUsers.h" |
| 71 |
#include "llvm/Analysis/LoopAnalysisManager.h" |
71 |
#include "llvm/Analysis/LoopAnalysisManager.h" |
| 72 |
#include "llvm/Analysis/LoopInfo.h" |
72 |
#include "llvm/Analysis/LoopInfo.h" |
| 73 |
#include "llvm/Analysis/LoopPass.h" |
73 |
#include "llvm/Analysis/LoopPass.h" |
| 74 |
#include "llvm/Analysis/MemorySSA.h" |
74 |
#include "llvm/Analysis/MemorySSA.h" |
| 75 |
#include "llvm/Analysis/MemorySSAUpdater.h" |
75 |
#include "llvm/Analysis/MemorySSAUpdater.h" |
| 76 |
#include "llvm/Analysis/ScalarEvolution.h" |
76 |
#include "llvm/Analysis/ScalarEvolution.h" |
| 77 |
#include "llvm/Analysis/ScalarEvolutionExpressions.h" |
77 |
#include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 78 |
#include "llvm/Analysis/ScalarEvolutionNormalization.h" |
78 |
#include "llvm/Analysis/ScalarEvolutionNormalization.h" |
| 79 |
#include "llvm/Analysis/TargetLibraryInfo.h" |
79 |
#include "llvm/Analysis/TargetLibraryInfo.h" |
| 80 |
#include "llvm/Analysis/TargetTransformInfo.h" |
80 |
#include "llvm/Analysis/TargetTransformInfo.h" |
| 81 |
#include "llvm/Analysis/ValueTracking.h" |
81 |
#include "llvm/Analysis/ValueTracking.h" |
| 82 |
#include "llvm/BinaryFormat/Dwarf.h" |
82 |
#include "llvm/BinaryFormat/Dwarf.h" |
| 83 |
#include "llvm/Config/llvm-config.h" |
83 |
#include "llvm/Config/llvm-config.h" |
| 84 |
#include "llvm/IR/BasicBlock.h" |
84 |
#include "llvm/IR/BasicBlock.h" |
| 85 |
#include "llvm/IR/Constant.h" |
85 |
#include "llvm/IR/Constant.h" |
| 86 |
#include "llvm/IR/Constants.h" |
86 |
#include "llvm/IR/Constants.h" |
| 87 |
#include "llvm/IR/DebugInfoMetadata.h" |
87 |
#include "llvm/IR/DebugInfoMetadata.h" |
| 88 |
#include "llvm/IR/DerivedTypes.h" |
88 |
#include "llvm/IR/DerivedTypes.h" |
| 89 |
#include "llvm/IR/Dominators.h" |
89 |
#include "llvm/IR/Dominators.h" |
| 90 |
#include "llvm/IR/GlobalValue.h" |
90 |
#include "llvm/IR/GlobalValue.h" |
| 91 |
#include "llvm/IR/IRBuilder.h" |
91 |
#include "llvm/IR/IRBuilder.h" |
| 92 |
#include "llvm/IR/InstrTypes.h" |
92 |
#include "llvm/IR/InstrTypes.h" |
| 93 |
#include "llvm/IR/Instruction.h" |
93 |
#include "llvm/IR/Instruction.h" |
| 94 |
#include "llvm/IR/Instructions.h" |
94 |
#include "llvm/IR/Instructions.h" |
| 95 |
#include "llvm/IR/IntrinsicInst.h" |
95 |
#include "llvm/IR/IntrinsicInst.h" |
| 96 |
#include "llvm/IR/Module.h" |
96 |
#include "llvm/IR/Module.h" |
| 97 |
#include "llvm/IR/Operator.h" |
97 |
#include "llvm/IR/Operator.h" |
| 98 |
#include "llvm/IR/PassManager.h" |
98 |
#include "llvm/IR/PassManager.h" |
| 99 |
#include "llvm/IR/Type.h" |
99 |
#include "llvm/IR/Type.h" |
| 100 |
#include "llvm/IR/Use.h" |
100 |
#include "llvm/IR/Use.h" |
| 101 |
#include "llvm/IR/User.h" |
101 |
#include "llvm/IR/User.h" |
| 102 |
#include "llvm/IR/Value.h" |
102 |
#include "llvm/IR/Value.h" |
| 103 |
#include "llvm/IR/ValueHandle.h" |
103 |
#include "llvm/IR/ValueHandle.h" |
| 104 |
#include "llvm/InitializePasses.h" |
104 |
#include "llvm/InitializePasses.h" |
| 105 |
#include "llvm/Pass.h" |
105 |
#include "llvm/Pass.h" |
| 106 |
#include "llvm/Support/Casting.h" |
106 |
#include "llvm/Support/Casting.h" |
| 107 |
#include "llvm/Support/CommandLine.h" |
107 |
#include "llvm/Support/CommandLine.h" |
| 108 |
#include "llvm/Support/Compiler.h" |
108 |
#include "llvm/Support/Compiler.h" |
| 109 |
#include "llvm/Support/Debug.h" |
109 |
#include "llvm/Support/Debug.h" |
| 110 |
#include "llvm/Support/ErrorHandling.h" |
110 |
#include "llvm/Support/ErrorHandling.h" |
| 111 |
#include "llvm/Support/MathExtras.h" |
111 |
#include "llvm/Support/MathExtras.h" |
| 112 |
#include "llvm/Support/raw_ostream.h" |
112 |
#include "llvm/Support/raw_ostream.h" |
| 113 |
#include "llvm/Transforms/Scalar.h" |
113 |
#include "llvm/Transforms/Scalar.h" |
| 114 |
#include "llvm/Transforms/Utils.h" |
114 |
#include "llvm/Transforms/Utils.h" |
| 115 |
#include "llvm/Transforms/Utils/BasicBlockUtils.h" |
115 |
#include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 116 |
#include "llvm/Transforms/Utils/Local.h" |
116 |
#include "llvm/Transforms/Utils/Local.h" |
| 117 |
#include "llvm/Transforms/Utils/LoopUtils.h" |
117 |
#include "llvm/Transforms/Utils/LoopUtils.h" |
| 118 |
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
118 |
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
| 119 |
#include |
119 |
#include |
| 120 |
#include |
120 |
#include |
| 121 |
#include |
121 |
#include |
| 122 |
#include |
122 |
#include |
| 123 |
#include |
123 |
#include |
| 124 |
#include |
124 |
#include |
| 125 |
#include |
125 |
#include |
| 126 |
#include |
126 |
#include |
| 127 |
#include |
127 |
#include |
| 128 |
#include |
128 |
#include |
| 129 |
|
129 |
|
| 130 |
using namespace llvm; |
130 |
using namespace llvm; |
| 131 |
|
131 |
|
| 132 |
#define DEBUG_TYPE "loop-reduce" |
132 |
#define DEBUG_TYPE "loop-reduce" |
| 133 |
|
133 |
|
| 134 |
/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for |
134 |
/// MaxIVUsers is an arbitrary threshold that provides an early opportunity for |
| 135 |
/// bail out. This threshold is far beyond the number of users that LSR can |
135 |
/// bail out. This threshold is far beyond the number of users that LSR can |
| 136 |
/// conceivably solve, so it should not affect generated code, but catches the |
136 |
/// conceivably solve, so it should not affect generated code, but catches the |
| 137 |
/// worst cases before LSR burns too much compile time and stack space. |
137 |
/// worst cases before LSR burns too much compile time and stack space. |
| 138 |
static const unsigned MaxIVUsers = 200; |
138 |
static const unsigned MaxIVUsers = 200; |
| 139 |
|
139 |
|
| 140 |
/// Limit the size of expression that SCEV-based salvaging will attempt to |
140 |
/// Limit the size of expression that SCEV-based salvaging will attempt to |
| 141 |
/// translate into a DIExpression. |
141 |
/// translate into a DIExpression. |
| 142 |
/// Choose a maximum size such that debuginfo is not excessively increased and |
142 |
/// Choose a maximum size such that debuginfo is not excessively increased and |
| 143 |
/// the salvaging is not too expensive for the compiler. |
143 |
/// the salvaging is not too expensive for the compiler. |
| 144 |
static const unsigned MaxSCEVSalvageExpressionSize = 64; |
144 |
static const unsigned MaxSCEVSalvageExpressionSize = 64; |
| 145 |
|
145 |
|
| 146 |
// Cleanup congruent phis after LSR phi expansion. |
146 |
// Cleanup congruent phis after LSR phi expansion. |
| 147 |
static cl::opt EnablePhiElim( |
147 |
static cl::opt EnablePhiElim( |
| 148 |
"enable-lsr-phielim", cl::Hidden, cl::init(true), |
148 |
"enable-lsr-phielim", cl::Hidden, cl::init(true), |
| 149 |
cl::desc("Enable LSR phi elimination")); |
149 |
cl::desc("Enable LSR phi elimination")); |
| 150 |
|
150 |
|
| 151 |
// The flag adds instruction count to solutions cost comparison. |
151 |
// The flag adds instruction count to solutions cost comparison. |
| 152 |
static cl::opt InsnsCost( |
152 |
static cl::opt InsnsCost( |
| 153 |
"lsr-insns-cost", cl::Hidden, cl::init(true), |
153 |
"lsr-insns-cost", cl::Hidden, cl::init(true), |
| 154 |
cl::desc("Add instruction count to a LSR cost model")); |
154 |
cl::desc("Add instruction count to a LSR cost model")); |
| 155 |
|
155 |
|
| 156 |
// Flag to choose how to narrow complex lsr solution |
156 |
// Flag to choose how to narrow complex lsr solution |
| 157 |
static cl::opt LSRExpNarrow( |
157 |
static cl::opt LSRExpNarrow( |
| 158 |
"lsr-exp-narrow", cl::Hidden, cl::init(false), |
158 |
"lsr-exp-narrow", cl::Hidden, cl::init(false), |
| 159 |
cl::desc("Narrow LSR complex solution using" |
159 |
cl::desc("Narrow LSR complex solution using" |
| 160 |
" expectation of registers number")); |
160 |
" expectation of registers number")); |
| 161 |
|
161 |
|
| 162 |
// Flag to narrow search space by filtering non-optimal formulae with |
162 |
// Flag to narrow search space by filtering non-optimal formulae with |
| 163 |
// the same ScaledReg and Scale. |
163 |
// the same ScaledReg and Scale. |
| 164 |
static cl::opt FilterSameScaledReg( |
164 |
static cl::opt FilterSameScaledReg( |
| 165 |
"lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), |
165 |
"lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), |
| 166 |
cl::desc("Narrow LSR search space by filtering non-optimal formulae" |
166 |
cl::desc("Narrow LSR search space by filtering non-optimal formulae" |
| 167 |
" with the same ScaledReg and Scale")); |
167 |
" with the same ScaledReg and Scale")); |
| 168 |
|
168 |
|
| 169 |
static cl::opt PreferredAddresingMode( |
169 |
static cl::opt PreferredAddresingMode( |
| 170 |
"lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), |
170 |
"lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), |
| 171 |
cl::desc("A flag that overrides the target's preferred addressing mode."), |
171 |
cl::desc("A flag that overrides the target's preferred addressing mode."), |
| 172 |
cl::values(clEnumValN(TTI::AMK_None, |
172 |
cl::values(clEnumValN(TTI::AMK_None, |
| 173 |
"none", |
173 |
"none", |
| 174 |
"Don't prefer any addressing mode"), |
174 |
"Don't prefer any addressing mode"), |
| 175 |
clEnumValN(TTI::AMK_PreIndexed, |
175 |
clEnumValN(TTI::AMK_PreIndexed, |
| 176 |
"preindexed", |
176 |
"preindexed", |
| 177 |
"Prefer pre-indexed addressing mode"), |
177 |
"Prefer pre-indexed addressing mode"), |
| 178 |
clEnumValN(TTI::AMK_PostIndexed, |
178 |
clEnumValN(TTI::AMK_PostIndexed, |
| 179 |
"postindexed", |
179 |
"postindexed", |
| 180 |
"Prefer post-indexed addressing mode"))); |
180 |
"Prefer post-indexed addressing mode"))); |
| 181 |
|
181 |
|
| 182 |
static cl::opt ComplexityLimit( |
182 |
static cl::opt ComplexityLimit( |
| 183 |
"lsr-complexity-limit", cl::Hidden, |
183 |
"lsr-complexity-limit", cl::Hidden, |
| 184 |
cl::init(std::numeric_limits::max()), |
184 |
cl::init(std::numeric_limits::max()), |
| 185 |
cl::desc("LSR search space complexity limit")); |
185 |
cl::desc("LSR search space complexity limit")); |
| 186 |
|
186 |
|
| 187 |
static cl::opt SetupCostDepthLimit( |
187 |
static cl::opt SetupCostDepthLimit( |
| 188 |
"lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), |
188 |
"lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), |
| 189 |
cl::desc("The limit on recursion depth for LSRs setup cost")); |
189 |
cl::desc("The limit on recursion depth for LSRs setup cost")); |
| 190 |
|
190 |
|
| 191 |
static cl::opt AllowTerminatingConditionFoldingAfterLSR( |
191 |
static cl::opt AllowTerminatingConditionFoldingAfterLSR( |
| 192 |
"lsr-term-fold", cl::Hidden, cl::init(false), |
192 |
"lsr-term-fold", cl::Hidden, cl::init(false), |
| 193 |
cl::desc("Attempt to replace primary IV with other IV.")); |
193 |
cl::desc("Attempt to replace primary IV with other IV.")); |
| 194 |
|
194 |
|
| 195 |
static cl::opt AllowDropSolutionIfLessProfitable( |
195 |
static cl::opt AllowDropSolutionIfLessProfitable( |
| 196 |
"lsr-drop-solution", cl::Hidden, cl::init(false), |
196 |
"lsr-drop-solution", cl::Hidden, cl::init(false), |
| 197 |
cl::desc("Attempt to drop solution if it is less profitable")); |
197 |
cl::desc("Attempt to drop solution if it is less profitable")); |
| 198 |
|
198 |
|
| 199 |
STATISTIC(NumTermFold, |
199 |
STATISTIC(NumTermFold, |
| 200 |
"Number of terminating condition fold recognized and performed"); |
200 |
"Number of terminating condition fold recognized and performed"); |
| 201 |
|
201 |
|
| 202 |
#ifndef NDEBUG |
202 |
#ifndef NDEBUG |
| 203 |
// Stress test IV chain generation. |
203 |
// Stress test IV chain generation. |
| 204 |
static cl::opt StressIVChain( |
204 |
static cl::opt StressIVChain( |
| 205 |
"stress-ivchain", cl::Hidden, cl::init(false), |
205 |
"stress-ivchain", cl::Hidden, cl::init(false), |
| 206 |
cl::desc("Stress test LSR IV chains")); |
206 |
cl::desc("Stress test LSR IV chains")); |
| 207 |
#else |
207 |
#else |
| 208 |
static bool StressIVChain = false; |
208 |
static bool StressIVChain = false; |
| 209 |
#endif |
209 |
#endif |
| 210 |
|
210 |
|
| 211 |
namespace { |
211 |
namespace { |
| 212 |
|
212 |
|
| 213 |
struct MemAccessTy { |
213 |
struct MemAccessTy { |
| 214 |
/// Used in situations where the accessed memory type is unknown. |
214 |
/// Used in situations where the accessed memory type is unknown. |
| 215 |
static const unsigned UnknownAddressSpace = |
215 |
static const unsigned UnknownAddressSpace = |
| 216 |
std::numeric_limits::max(); |
216 |
std::numeric_limits::max(); |
| 217 |
|
217 |
|
| 218 |
Type *MemTy = nullptr; |
218 |
Type *MemTy = nullptr; |
| 219 |
unsigned AddrSpace = UnknownAddressSpace; |
219 |
unsigned AddrSpace = UnknownAddressSpace; |
| 220 |
|
220 |
|
| 221 |
MemAccessTy() = default; |
221 |
MemAccessTy() = default; |
| 222 |
MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {} |
222 |
MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {} |
| 223 |
|
223 |
|
| 224 |
bool operator==(MemAccessTy Other) const { |
224 |
bool operator==(MemAccessTy Other) const { |
| 225 |
return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace; |
225 |
return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace; |
| 226 |
} |
226 |
} |
| 227 |
|
227 |
|
| 228 |
bool operator!=(MemAccessTy Other) const { return !(*this == Other); } |
228 |
bool operator!=(MemAccessTy Other) const { return !(*this == Other); } |
| 229 |
|
229 |
|
| 230 |
static MemAccessTy getUnknown(LLVMContext &Ctx, |
230 |
static MemAccessTy getUnknown(LLVMContext &Ctx, |
| 231 |
unsigned AS = UnknownAddressSpace) { |
231 |
unsigned AS = UnknownAddressSpace) { |
| 232 |
return MemAccessTy(Type::getVoidTy(Ctx), AS); |
232 |
return MemAccessTy(Type::getVoidTy(Ctx), AS); |
| 233 |
} |
233 |
} |
| 234 |
|
234 |
|
| 235 |
Type *getType() { return MemTy; } |
235 |
Type *getType() { return MemTy; } |
| 236 |
}; |
236 |
}; |
| 237 |
|
237 |
|
| 238 |
/// This class holds data which is used to order reuse candidates. |
238 |
/// This class holds data which is used to order reuse candidates. |
| 239 |
class RegSortData { |
239 |
class RegSortData { |
| 240 |
public: |
240 |
public: |
| 241 |
/// This represents the set of LSRUse indices which reference |
241 |
/// This represents the set of LSRUse indices which reference |
| 242 |
/// a particular register. |
242 |
/// a particular register. |
| 243 |
SmallBitVector UsedByIndices; |
243 |
SmallBitVector UsedByIndices; |
| 244 |
|
244 |
|
| 245 |
void print(raw_ostream &OS) const; |
245 |
void print(raw_ostream &OS) const; |
| 246 |
void dump() const; |
246 |
void dump() const; |
| 247 |
}; |
247 |
}; |
| 248 |
|
248 |
|
| 249 |
} // end anonymous namespace |
249 |
} // end anonymous namespace |
| 250 |
|
250 |
|
| 251 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
251 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 252 |
void RegSortData::print(raw_ostream &OS) const { |
252 |
void RegSortData::print(raw_ostream &OS) const { |
| 253 |
OS << "[NumUses=" << UsedByIndices.count() << ']'; |
253 |
OS << "[NumUses=" << UsedByIndices.count() << ']'; |
| 254 |
} |
254 |
} |
| 255 |
|
255 |
|
| 256 |
LLVM_DUMP_METHOD void RegSortData::dump() const { |
256 |
LLVM_DUMP_METHOD void RegSortData::dump() const { |
| 257 |
print(errs()); errs() << '\n'; |
257 |
print(errs()); errs() << '\n'; |
| 258 |
} |
258 |
} |
| 259 |
#endif |
259 |
#endif |
| 260 |
|
260 |
|
| 261 |
namespace { |
261 |
namespace { |
| 262 |
|
262 |
|
| 263 |
/// Map register candidates to information about how they are used. |
263 |
/// Map register candidates to information about how they are used. |
| 264 |
class RegUseTracker { |
264 |
class RegUseTracker { |
| 265 |
using RegUsesTy = DenseMap; |
265 |
using RegUsesTy = DenseMap; |
| 266 |
|
266 |
|
| 267 |
RegUsesTy RegUsesMap; |
267 |
RegUsesTy RegUsesMap; |
| 268 |
SmallVector RegSequence; |
268 |
SmallVector RegSequence; |
| 269 |
|
269 |
|
| 270 |
public: |
270 |
public: |
| 271 |
void countRegister(const SCEV *Reg, size_t LUIdx); |
271 |
void countRegister(const SCEV *Reg, size_t LUIdx); |
| 272 |
void dropRegister(const SCEV *Reg, size_t LUIdx); |
272 |
void dropRegister(const SCEV *Reg, size_t LUIdx); |
| 273 |
void swapAndDropUse(size_t LUIdx, size_t LastLUIdx); |
273 |
void swapAndDropUse(size_t LUIdx, size_t LastLUIdx); |
| 274 |
|
274 |
|
| 275 |
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; |
275 |
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; |
| 276 |
|
276 |
|
| 277 |
const SmallBitVector &getUsedByIndices(const SCEV *Reg) const; |
277 |
const SmallBitVector &getUsedByIndices(const SCEV *Reg) const; |
| 278 |
|
278 |
|
| 279 |
void clear(); |
279 |
void clear(); |
| 280 |
|
280 |
|
| 281 |
using iterator = SmallVectorImpl::iterator; |
281 |
using iterator = SmallVectorImpl::iterator; |
| 282 |
using const_iterator = SmallVectorImpl::const_iterator; |
282 |
using const_iterator = SmallVectorImpl::const_iterator; |
| 283 |
|
283 |
|
| 284 |
iterator begin() { return RegSequence.begin(); } |
284 |
iterator begin() { return RegSequence.begin(); } |
| 285 |
iterator end() { return RegSequence.end(); } |
285 |
iterator end() { return RegSequence.end(); } |
| 286 |
const_iterator begin() const { return RegSequence.begin(); } |
286 |
const_iterator begin() const { return RegSequence.begin(); } |
| 287 |
const_iterator end() const { return RegSequence.end(); } |
287 |
const_iterator end() const { return RegSequence.end(); } |
| 288 |
}; |
288 |
}; |
| 289 |
|
289 |
|
| 290 |
} // end anonymous namespace |
290 |
} // end anonymous namespace |
| 291 |
|
291 |
|
| 292 |
void |
292 |
void |
| 293 |
RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) { |
293 |
RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) { |
| 294 |
std::pair Pair = |
294 |
std::pair Pair = |
| 295 |
RegUsesMap.insert(std::make_pair(Reg, RegSortData())); |
295 |
RegUsesMap.insert(std::make_pair(Reg, RegSortData())); |
| 296 |
RegSortData &RSD = Pair.first->second; |
296 |
RegSortData &RSD = Pair.first->second; |
| 297 |
if (Pair.second) |
297 |
if (Pair.second) |
| 298 |
RegSequence.push_back(Reg); |
298 |
RegSequence.push_back(Reg); |
| 299 |
RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1)); |
299 |
RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1)); |
| 300 |
RSD.UsedByIndices.set(LUIdx); |
300 |
RSD.UsedByIndices.set(LUIdx); |
| 301 |
} |
301 |
} |
| 302 |
|
302 |
|
| 303 |
void |
303 |
void |
| 304 |
RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) { |
304 |
RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) { |
| 305 |
RegUsesTy::iterator It = RegUsesMap.find(Reg); |
305 |
RegUsesTy::iterator It = RegUsesMap.find(Reg); |
| 306 |
assert(It != RegUsesMap.end()); |
306 |
assert(It != RegUsesMap.end()); |
| 307 |
RegSortData &RSD = It->second; |
307 |
RegSortData &RSD = It->second; |
| 308 |
assert(RSD.UsedByIndices.size() > LUIdx); |
308 |
assert(RSD.UsedByIndices.size() > LUIdx); |
| 309 |
RSD.UsedByIndices.reset(LUIdx); |
309 |
RSD.UsedByIndices.reset(LUIdx); |
| 310 |
} |
310 |
} |
| 311 |
|
311 |
|
| 312 |
void |
312 |
void |
| 313 |
RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) { |
313 |
RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) { |
| 314 |
assert(LUIdx <= LastLUIdx); |
314 |
assert(LUIdx <= LastLUIdx); |
| 315 |
|
315 |
|
| 316 |
// Update RegUses. The data structure is not optimized for this purpose; |
316 |
// Update RegUses. The data structure is not optimized for this purpose; |
| 317 |
// we must iterate through it and update each of the bit vectors. |
317 |
// we must iterate through it and update each of the bit vectors. |
| 318 |
for (auto &Pair : RegUsesMap) { |
318 |
for (auto &Pair : RegUsesMap) { |
| 319 |
SmallBitVector &UsedByIndices = Pair.second.UsedByIndices; |
319 |
SmallBitVector &UsedByIndices = Pair.second.UsedByIndices; |
| 320 |
if (LUIdx < UsedByIndices.size()) |
320 |
if (LUIdx < UsedByIndices.size()) |
| 321 |
UsedByIndices[LUIdx] = |
321 |
UsedByIndices[LUIdx] = |
| 322 |
LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false; |
322 |
LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false; |
| 323 |
UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); |
323 |
UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); |
| 324 |
} |
324 |
} |
| 325 |
} |
325 |
} |
| 326 |
|
326 |
|
| 327 |
bool |
327 |
bool |
| 328 |
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { |
328 |
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { |
| 329 |
RegUsesTy::const_iterator I = RegUsesMap.find(Reg); |
329 |
RegUsesTy::const_iterator I = RegUsesMap.find(Reg); |
| 330 |
if (I == RegUsesMap.end()) |
330 |
if (I == RegUsesMap.end()) |
| 331 |
return false; |
331 |
return false; |
| 332 |
const SmallBitVector &UsedByIndices = I->second.UsedByIndices; |
332 |
const SmallBitVector &UsedByIndices = I->second.UsedByIndices; |
| 333 |
int i = UsedByIndices.find_first(); |
333 |
int i = UsedByIndices.find_first(); |
| 334 |
if (i == -1) return false; |
334 |
if (i == -1) return false; |
| 335 |
if ((size_t)i != LUIdx) return true; |
335 |
if ((size_t)i != LUIdx) return true; |
| 336 |
return UsedByIndices.find_next(i) != -1; |
336 |
return UsedByIndices.find_next(i) != -1; |
| 337 |
} |
337 |
} |
| 338 |
|
338 |
|
| 339 |
const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { |
339 |
const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { |
| 340 |
RegUsesTy::const_iterator I = RegUsesMap.find(Reg); |
340 |
RegUsesTy::const_iterator I = RegUsesMap.find(Reg); |
| 341 |
assert(I != RegUsesMap.end() && "Unknown register!"); |
341 |
assert(I != RegUsesMap.end() && "Unknown register!"); |
| 342 |
return I->second.UsedByIndices; |
342 |
return I->second.UsedByIndices; |
| 343 |
} |
343 |
} |
| 344 |
|
344 |
|
| 345 |
void RegUseTracker::clear() { |
345 |
void RegUseTracker::clear() { |
| 346 |
RegUsesMap.clear(); |
346 |
RegUsesMap.clear(); |
| 347 |
RegSequence.clear(); |
347 |
RegSequence.clear(); |
| 348 |
} |
348 |
} |
| 349 |
|
349 |
|
| 350 |
namespace { |
350 |
namespace { |
| 351 |
|
351 |
|
| 352 |
/// This class holds information that describes a formula for computing |
352 |
/// This class holds information that describes a formula for computing |
| 353 |
/// satisfying a use. It may include broken-out immediates and scaled registers. |
353 |
/// satisfying a use. It may include broken-out immediates and scaled registers. |
| 354 |
struct Formula { |
354 |
struct Formula { |
| 355 |
/// Global base address used for complex addressing. |
355 |
/// Global base address used for complex addressing. |
| 356 |
GlobalValue *BaseGV = nullptr; |
356 |
GlobalValue *BaseGV = nullptr; |
| 357 |
|
357 |
|
| 358 |
/// Base offset for complex addressing. |
358 |
/// Base offset for complex addressing. |
| 359 |
int64_t BaseOffset = 0; |
359 |
int64_t BaseOffset = 0; |
| 360 |
|
360 |
|
| 361 |
/// Whether any complex addressing has a base register. |
361 |
/// Whether any complex addressing has a base register. |
| 362 |
bool HasBaseReg = false; |
362 |
bool HasBaseReg = false; |
| 363 |
|
363 |
|
| 364 |
/// The scale of any complex addressing. |
364 |
/// The scale of any complex addressing. |
| 365 |
int64_t Scale = 0; |
365 |
int64_t Scale = 0; |
| 366 |
|
366 |
|
| 367 |
/// The list of "base" registers for this use. When this is non-empty. The |
367 |
/// The list of "base" registers for this use. When this is non-empty. The |
| 368 |
/// canonical representation of a formula is |
368 |
/// canonical representation of a formula is |
| 369 |
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and |
369 |
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and |
| 370 |
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). |
370 |
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). |
| 371 |
/// 3. The reg containing recurrent expr related with currect loop in the |
371 |
/// 3. The reg containing recurrent expr related with currect loop in the |
| 372 |
/// formula should be put in the ScaledReg. |
372 |
/// formula should be put in the ScaledReg. |
| 373 |
/// #1 enforces that the scaled register is always used when at least two |
373 |
/// #1 enforces that the scaled register is always used when at least two |
| 374 |
/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. |
374 |
/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. |
| 375 |
/// #2 enforces that 1 * reg is reg. |
375 |
/// #2 enforces that 1 * reg is reg. |
| 376 |
/// #3 ensures invariant regs with respect to current loop can be combined |
376 |
/// #3 ensures invariant regs with respect to current loop can be combined |
| 377 |
/// together in LSR codegen. |
377 |
/// together in LSR codegen. |
| 378 |
/// This invariant can be temporarily broken while building a formula. |
378 |
/// This invariant can be temporarily broken while building a formula. |
| 379 |
/// However, every formula inserted into the LSRInstance must be in canonical |
379 |
/// However, every formula inserted into the LSRInstance must be in canonical |
| 380 |
/// form. |
380 |
/// form. |
| 381 |
SmallVector BaseRegs; |
381 |
SmallVector BaseRegs; |
| 382 |
|
382 |
|
| 383 |
/// The 'scaled' register for this use. This should be non-null when Scale is |
383 |
/// The 'scaled' register for this use. This should be non-null when Scale is |
| 384 |
/// not zero. |
384 |
/// not zero. |
| 385 |
const SCEV *ScaledReg = nullptr; |
385 |
const SCEV *ScaledReg = nullptr; |
| 386 |
|
386 |
|
| 387 |
/// An additional constant offset which added near the use. This requires a |
387 |
/// An additional constant offset which added near the use. This requires a |
| 388 |
/// temporary register, but the offset itself can live in an add immediate |
388 |
/// temporary register, but the offset itself can live in an add immediate |
| 389 |
/// field rather than a register. |
389 |
/// field rather than a register. |
| 390 |
int64_t UnfoldedOffset = 0; |
390 |
int64_t UnfoldedOffset = 0; |
| 391 |
|
391 |
|
| 392 |
Formula() = default; |
392 |
Formula() = default; |
| 393 |
|
393 |
|
| 394 |
void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); |
394 |
void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); |
| 395 |
|
395 |
|
| 396 |
bool isCanonical(const Loop &L) const; |
396 |
bool isCanonical(const Loop &L) const; |
| 397 |
|
397 |
|
| 398 |
void canonicalize(const Loop &L); |
398 |
void canonicalize(const Loop &L); |
| 399 |
|
399 |
|
| 400 |
bool unscale(); |
400 |
bool unscale(); |
| 401 |
|
401 |
|
| 402 |
bool hasZeroEnd() const; |
402 |
bool hasZeroEnd() const; |
| 403 |
|
403 |
|
| 404 |
size_t getNumRegs() const; |
404 |
size_t getNumRegs() const; |
| 405 |
Type *getType() const; |
405 |
Type *getType() const; |
| 406 |
|
406 |
|
| 407 |
void deleteBaseReg(const SCEV *&S); |
407 |
void deleteBaseReg(const SCEV *&S); |
| 408 |
|
408 |
|
| 409 |
bool referencesReg(const SCEV *S) const; |
409 |
bool referencesReg(const SCEV *S) const; |
| 410 |
bool hasRegsUsedByUsesOtherThan(size_t LUIdx, |
410 |
bool hasRegsUsedByUsesOtherThan(size_t LUIdx, |
| 411 |
const RegUseTracker &RegUses) const; |
411 |
const RegUseTracker &RegUses) const; |
| 412 |
|
412 |
|
| 413 |
void print(raw_ostream &OS) const; |
413 |
void print(raw_ostream &OS) const; |
| 414 |
void dump() const; |
414 |
void dump() const; |
| 415 |
}; |
415 |
}; |
| 416 |
|
416 |
|
| 417 |
} // end anonymous namespace |
417 |
} // end anonymous namespace |
| 418 |
|
418 |
|
| 419 |
/// Recursion helper for initialMatch. |
419 |
/// Recursion helper for initialMatch. |
| 420 |
static void DoInitialMatch(const SCEV *S, Loop *L, |
420 |
static void DoInitialMatch(const SCEV *S, Loop *L, |
| 421 |
SmallVectorImpl &Good, |
421 |
SmallVectorImpl &Good, |
| 422 |
SmallVectorImpl &Bad, |
422 |
SmallVectorImpl &Bad, |
| 423 |
ScalarEvolution &SE) { |
423 |
ScalarEvolution &SE) { |
| 424 |
// Collect expressions which properly dominate the loop header. |
424 |
// Collect expressions which properly dominate the loop header. |
| 425 |
if (SE.properlyDominates(S, L->getHeader())) { |
425 |
if (SE.properlyDominates(S, L->getHeader())) { |
| 426 |
Good.push_back(S); |
426 |
Good.push_back(S); |
| 427 |
return; |
427 |
return; |
| 428 |
} |
428 |
} |
| 429 |
|
429 |
|
| 430 |
// Look at add operands. |
430 |
// Look at add operands. |
| 431 |
if (const SCEVAddExpr *Add = dyn_cast(S)) { |
431 |
if (const SCEVAddExpr *Add = dyn_cast(S)) { |
| 432 |
for (const SCEV *S : Add->operands()) |
432 |
for (const SCEV *S : Add->operands()) |
| 433 |
DoInitialMatch(S, L, Good, Bad, SE); |
433 |
DoInitialMatch(S, L, Good, Bad, SE); |
| 434 |
return; |
434 |
return; |
| 435 |
} |
435 |
} |
| 436 |
|
436 |
|
| 437 |
// Look at addrec operands. |
437 |
// Look at addrec operands. |
| 438 |
if (const SCEVAddRecExpr *AR = dyn_cast(S)) |
438 |
if (const SCEVAddRecExpr *AR = dyn_cast(S)) |
| 439 |
if (!AR->getStart()->isZero() && AR->isAffine()) { |
439 |
if (!AR->getStart()->isZero() && AR->isAffine()) { |
| 440 |
DoInitialMatch(AR->getStart(), L, Good, Bad, SE); |
440 |
DoInitialMatch(AR->getStart(), L, Good, Bad, SE); |
| 441 |
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), |
441 |
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), |
| 442 |
AR->getStepRecurrence(SE), |
442 |
AR->getStepRecurrence(SE), |
| 443 |
// FIXME: AR->getNoWrapFlags() |
443 |
// FIXME: AR->getNoWrapFlags() |
| 444 |
AR->getLoop(), SCEV::FlagAnyWrap), |
444 |
AR->getLoop(), SCEV::FlagAnyWrap), |
| 445 |
L, Good, Bad, SE); |
445 |
L, Good, Bad, SE); |
| 446 |
return; |
446 |
return; |
| 447 |
} |
447 |
} |
| 448 |
|
448 |
|
| 449 |
// Handle a multiplication by -1 (negation) if it didn't fold. |
449 |
// Handle a multiplication by -1 (negation) if it didn't fold. |
| 450 |
if (const SCEVMulExpr *Mul = dyn_cast(S)) |
450 |
if (const SCEVMulExpr *Mul = dyn_cast(S)) |
| 451 |
if (Mul->getOperand(0)->isAllOnesValue()) { |
451 |
if (Mul->getOperand(0)->isAllOnesValue()) { |
| 452 |
SmallVector Ops(drop_begin(Mul->operands())); |
452 |
SmallVector Ops(drop_begin(Mul->operands())); |
| 453 |
const SCEV *NewMul = SE.getMulExpr(Ops); |
453 |
const SCEV *NewMul = SE.getMulExpr(Ops); |
| 454 |
|
454 |
|
| 455 |
SmallVector MyGood; |
455 |
SmallVector MyGood; |
| 456 |
SmallVector MyBad; |
456 |
SmallVector MyBad; |
| 457 |
DoInitialMatch(NewMul, L, MyGood, MyBad, SE); |
457 |
DoInitialMatch(NewMul, L, MyGood, MyBad, SE); |
| 458 |
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( |
458 |
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( |
| 459 |
SE.getEffectiveSCEVType(NewMul->getType()))); |
459 |
SE.getEffectiveSCEVType(NewMul->getType()))); |
| 460 |
for (const SCEV *S : MyGood) |
460 |
for (const SCEV *S : MyGood) |
| 461 |
Good.push_back(SE.getMulExpr(NegOne, S)); |
461 |
Good.push_back(SE.getMulExpr(NegOne, S)); |
| 462 |
for (const SCEV *S : MyBad) |
462 |
for (const SCEV *S : MyBad) |
| 463 |
Bad.push_back(SE.getMulExpr(NegOne, S)); |
463 |
Bad.push_back(SE.getMulExpr(NegOne, S)); |
| 464 |
return; |
464 |
return; |
| 465 |
} |
465 |
} |
| 466 |
|
466 |
|
| 467 |
// Ok, we can't do anything interesting. Just stuff the whole thing into a |
467 |
// Ok, we can't do anything interesting. Just stuff the whole thing into a |
| 468 |
// register and hope for the best. |
468 |
// register and hope for the best. |
| 469 |
Bad.push_back(S); |
469 |
Bad.push_back(S); |
| 470 |
} |
470 |
} |
| 471 |
|
471 |
|
| 472 |
/// Incorporate loop-variant parts of S into this Formula, attempting to keep |
472 |
/// Incorporate loop-variant parts of S into this Formula, attempting to keep |
| 473 |
/// all loop-invariant and loop-computable values in a single base register. |
473 |
/// all loop-invariant and loop-computable values in a single base register. |
| 474 |
void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { |
474 |
void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { |
| 475 |
SmallVector Good; |
475 |
SmallVector Good; |
| 476 |
SmallVector Bad; |
476 |
SmallVector Bad; |
| 477 |
DoInitialMatch(S, L, Good, Bad, SE); |
477 |
DoInitialMatch(S, L, Good, Bad, SE); |
| 478 |
if (!Good.empty()) { |
478 |
if (!Good.empty()) { |
| 479 |
const SCEV *Sum = SE.getAddExpr(Good); |
479 |
const SCEV *Sum = SE.getAddExpr(Good); |
| 480 |
if (!Sum->isZero()) |
480 |
if (!Sum->isZero()) |
| 481 |
BaseRegs.push_back(Sum); |
481 |
BaseRegs.push_back(Sum); |
| 482 |
HasBaseReg = true; |
482 |
HasBaseReg = true; |
| 483 |
} |
483 |
} |
| 484 |
if (!Bad.empty()) { |
484 |
if (!Bad.empty()) { |
| 485 |
const SCEV *Sum = SE.getAddExpr(Bad); |
485 |
const SCEV *Sum = SE.getAddExpr(Bad); |
| 486 |
if (!Sum->isZero()) |
486 |
if (!Sum->isZero()) |
| 487 |
BaseRegs.push_back(Sum); |
487 |
BaseRegs.push_back(Sum); |
| 488 |
HasBaseReg = true; |
488 |
HasBaseReg = true; |
| 489 |
} |
489 |
} |
| 490 |
canonicalize(*L); |
490 |
canonicalize(*L); |
| 491 |
} |
491 |
} |
| 492 |
|
492 |
|
| 493 |
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) { |
493 |
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) { |
| 494 |
return SCEVExprContains(S, [&L](const SCEV *S) { |
494 |
return SCEVExprContains(S, [&L](const SCEV *S) { |
| 495 |
return isa(S) && (cast(S)->getLoop() == &L); |
495 |
return isa(S) && (cast(S)->getLoop() == &L); |
| 496 |
}); |
496 |
}); |
| 497 |
} |
497 |
} |
| 498 |
|
498 |
|
| 499 |
/// Check whether or not this formula satisfies the canonical |
499 |
/// Check whether or not this formula satisfies the canonical |
| 500 |
/// representation. |
500 |
/// representation. |
| 501 |
/// \see Formula::BaseRegs. |
501 |
/// \see Formula::BaseRegs. |
| 502 |
bool Formula::isCanonical(const Loop &L) const { |
502 |
bool Formula::isCanonical(const Loop &L) const { |
| 503 |
if (!ScaledReg) |
503 |
if (!ScaledReg) |
| 504 |
return BaseRegs.size() <= 1; |
504 |
return BaseRegs.size() <= 1; |
| 505 |
|
505 |
|
| 506 |
if (Scale != 1) |
506 |
if (Scale != 1) |
| 507 |
return true; |
507 |
return true; |
| 508 |
|
508 |
|
| 509 |
if (Scale == 1 && BaseRegs.empty()) |
509 |
if (Scale == 1 && BaseRegs.empty()) |
| 510 |
return false; |
510 |
return false; |
| 511 |
|
511 |
|
| 512 |
if (containsAddRecDependentOnLoop(ScaledReg, L)) |
512 |
if (containsAddRecDependentOnLoop(ScaledReg, L)) |
| 513 |
return true; |
513 |
return true; |
| 514 |
|
514 |
|
| 515 |
// If ScaledReg is not a recurrent expr, or it is but its loop is not current |
515 |
// If ScaledReg is not a recurrent expr, or it is but its loop is not current |
| 516 |
// loop, meanwhile BaseRegs contains a recurrent expr reg related with current |
516 |
// loop, meanwhile BaseRegs contains a recurrent expr reg related with current |
| 517 |
// loop, we want to swap the reg in BaseRegs with ScaledReg. |
517 |
// loop, we want to swap the reg in BaseRegs with ScaledReg. |
| 518 |
return none_of(BaseRegs, [&L](const SCEV *S) { |
518 |
return none_of(BaseRegs, [&L](const SCEV *S) { |
| 519 |
return containsAddRecDependentOnLoop(S, L); |
519 |
return containsAddRecDependentOnLoop(S, L); |
| 520 |
}); |
520 |
}); |
| 521 |
} |
521 |
} |
| 522 |
|
522 |
|
| 523 |
/// Helper method to morph a formula into its canonical representation. |
523 |
/// Helper method to morph a formula into its canonical representation. |
| 524 |
/// \see Formula::BaseRegs. |
524 |
/// \see Formula::BaseRegs. |
| 525 |
/// Every formula having more than one base register, must use the ScaledReg |
525 |
/// Every formula having more than one base register, must use the ScaledReg |
| 526 |
/// field. Otherwise, we would have to do special cases everywhere in LSR |
526 |
/// field. Otherwise, we would have to do special cases everywhere in LSR |
| 527 |
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... |
527 |
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... |
| 528 |
/// On the other hand, 1*reg should be canonicalized into reg. |
528 |
/// On the other hand, 1*reg should be canonicalized into reg. |
| 529 |
void Formula::canonicalize(const Loop &L) { |
529 |
void Formula::canonicalize(const Loop &L) { |
| 530 |
if (isCanonical(L)) |
530 |
if (isCanonical(L)) |
| 531 |
return; |
531 |
return; |
| 532 |
|
532 |
|
| 533 |
if (BaseRegs.empty()) { |
533 |
if (BaseRegs.empty()) { |
| 534 |
// No base reg? Use scale reg with scale = 1 as such. |
534 |
// No base reg? Use scale reg with scale = 1 as such. |
| 535 |
assert(ScaledReg && "Expected 1*reg => reg"); |
535 |
assert(ScaledReg && "Expected 1*reg => reg"); |
| 536 |
assert(Scale == 1 && "Expected 1*reg => reg"); |
536 |
assert(Scale == 1 && "Expected 1*reg => reg"); |
| 537 |
BaseRegs.push_back(ScaledReg); |
537 |
BaseRegs.push_back(ScaledReg); |
| 538 |
Scale = 0; |
538 |
Scale = 0; |
| 539 |
ScaledReg = nullptr; |
539 |
ScaledReg = nullptr; |
| 540 |
return; |
540 |
return; |
| 541 |
} |
541 |
} |
| 542 |
|
542 |
|
| 543 |
// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. |
543 |
// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. |
| 544 |
if (!ScaledReg) { |
544 |
if (!ScaledReg) { |
| 545 |
ScaledReg = BaseRegs.pop_back_val(); |
545 |
ScaledReg = BaseRegs.pop_back_val(); |
| 546 |
Scale = 1; |
546 |
Scale = 1; |
| 547 |
} |
547 |
} |
| 548 |
|
548 |
|
| 549 |
// If ScaledReg is an invariant with respect to L, find the reg from |
549 |
// If ScaledReg is an invariant with respect to L, find the reg from |
| 550 |
// BaseRegs containing the recurrent expr related with Loop L. Swap the |
550 |
// BaseRegs containing the recurrent expr related with Loop L. Swap the |
| 551 |
// reg with ScaledReg. |
551 |
// reg with ScaledReg. |
| 552 |
if (!containsAddRecDependentOnLoop(ScaledReg, L)) { |
552 |
if (!containsAddRecDependentOnLoop(ScaledReg, L)) { |
| 553 |
auto I = find_if(BaseRegs, [&L](const SCEV *S) { |
553 |
auto I = find_if(BaseRegs, [&L](const SCEV *S) { |
| 554 |
return containsAddRecDependentOnLoop(S, L); |
554 |
return containsAddRecDependentOnLoop(S, L); |
| 555 |
}); |
555 |
}); |
| 556 |
if (I != BaseRegs.end()) |
556 |
if (I != BaseRegs.end()) |
| 557 |
std::swap(ScaledReg, *I); |
557 |
std::swap(ScaledReg, *I); |
| 558 |
} |
558 |
} |
| 559 |
assert(isCanonical(L) && "Failed to canonicalize?"); |
559 |
assert(isCanonical(L) && "Failed to canonicalize?"); |
| 560 |
} |
560 |
} |
| 561 |
|
561 |
|
| 562 |
/// Get rid of the scale in the formula. |
562 |
/// Get rid of the scale in the formula. |
| 563 |
/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. |
563 |
/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. |
| 564 |
/// \return true if it was possible to get rid of the scale, false otherwise. |
564 |
/// \return true if it was possible to get rid of the scale, false otherwise. |
| 565 |
/// \note After this operation the formula may not be in the canonical form. |
565 |
/// \note After this operation the formula may not be in the canonical form. |
| 566 |
bool Formula::unscale() { |
566 |
bool Formula::unscale() { |
| 567 |
if (Scale != 1) |
567 |
if (Scale != 1) |
| 568 |
return false; |
568 |
return false; |
| 569 |
Scale = 0; |
569 |
Scale = 0; |
| 570 |
BaseRegs.push_back(ScaledReg); |
570 |
BaseRegs.push_back(ScaledReg); |
| 571 |
ScaledReg = nullptr; |
571 |
ScaledReg = nullptr; |
| 572 |
return true; |
572 |
return true; |
| 573 |
} |
573 |
} |
| 574 |
|
574 |
|
| 575 |
bool Formula::hasZeroEnd() const { |
575 |
bool Formula::hasZeroEnd() const { |
| 576 |
if (UnfoldedOffset || BaseOffset) |
576 |
if (UnfoldedOffset || BaseOffset) |
| 577 |
return false; |
577 |
return false; |
| 578 |
if (BaseRegs.size() != 1 || ScaledReg) |
578 |
if (BaseRegs.size() != 1 || ScaledReg) |
| 579 |
return false; |
579 |
return false; |
| 580 |
return true; |
580 |
return true; |
| 581 |
} |
581 |
} |
| 582 |
|
582 |
|
| 583 |
/// Return the total number of register operands used by this formula. This does |
583 |
/// Return the total number of register operands used by this formula. This does |
| 584 |
/// not include register uses implied by non-constant addrec strides. |
584 |
/// not include register uses implied by non-constant addrec strides. |
| 585 |
size_t Formula::getNumRegs() const { |
585 |
size_t Formula::getNumRegs() const { |
| 586 |
return !!ScaledReg + BaseRegs.size(); |
586 |
return !!ScaledReg + BaseRegs.size(); |
| 587 |
} |
587 |
} |
| 588 |
|
588 |
|
| 589 |
/// Return the type of this formula, if it has one, or null otherwise. This type |
589 |
/// Return the type of this formula, if it has one, or null otherwise. This type |
| 590 |
/// is meaningless except for the bit size. |
590 |
/// is meaningless except for the bit size. |
| 591 |
Type *Formula::getType() const { |
591 |
Type *Formula::getType() const { |
| 592 |
return !BaseRegs.empty() ? BaseRegs.front()->getType() : |
592 |
return !BaseRegs.empty() ? BaseRegs.front()->getType() : |
| 593 |
ScaledReg ? ScaledReg->getType() : |
593 |
ScaledReg ? ScaledReg->getType() : |
| 594 |
BaseGV ? BaseGV->getType() : |
594 |
BaseGV ? BaseGV->getType() : |
| 595 |
nullptr; |
595 |
nullptr; |
| 596 |
} |
596 |
} |
| 597 |
|
597 |
|
| 598 |
/// Delete the given base reg from the BaseRegs list. |
598 |
/// Delete the given base reg from the BaseRegs list. |
| 599 |
void Formula::deleteBaseReg(const SCEV *&S) { |
599 |
void Formula::deleteBaseReg(const SCEV *&S) { |
| 600 |
if (&S != &BaseRegs.back()) |
600 |
if (&S != &BaseRegs.back()) |
| 601 |
std::swap(S, BaseRegs.back()); |
601 |
std::swap(S, BaseRegs.back()); |
| 602 |
BaseRegs.pop_back(); |
602 |
BaseRegs.pop_back(); |
| 603 |
} |
603 |
} |
| 604 |
|
604 |
|
| 605 |
/// Test if this formula references the given register. |
605 |
/// Test if this formula references the given register. |
| 606 |
bool Formula::referencesReg(const SCEV *S) const { |
606 |
bool Formula::referencesReg(const SCEV *S) const { |
| 607 |
return S == ScaledReg || is_contained(BaseRegs, S); |
607 |
return S == ScaledReg || is_contained(BaseRegs, S); |
| 608 |
} |
608 |
} |
| 609 |
|
609 |
|
| 610 |
/// Test whether this formula uses registers which are used by uses other than |
610 |
/// Test whether this formula uses registers which are used by uses other than |
| 611 |
/// the use with the given index. |
611 |
/// the use with the given index. |
| 612 |
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, |
612 |
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, |
| 613 |
const RegUseTracker &RegUses) const { |
613 |
const RegUseTracker &RegUses) const { |
| 614 |
if (ScaledReg) |
614 |
if (ScaledReg) |
| 615 |
if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx)) |
615 |
if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx)) |
| 616 |
return true; |
616 |
return true; |
| 617 |
for (const SCEV *BaseReg : BaseRegs) |
617 |
for (const SCEV *BaseReg : BaseRegs) |
| 618 |
if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx)) |
618 |
if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx)) |
| 619 |
return true; |
619 |
return true; |
| 620 |
return false; |
620 |
return false; |
| 621 |
} |
621 |
} |
| 622 |
|
622 |
|
| 623 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
623 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 624 |
void Formula::print(raw_ostream &OS) const { |
624 |
void Formula::print(raw_ostream &OS) const { |
| 625 |
bool First = true; |
625 |
bool First = true; |
| 626 |
if (BaseGV) { |
626 |
if (BaseGV) { |
| 627 |
if (!First) OS << " + "; else First = false; |
627 |
if (!First) OS << " + "; else First = false; |
| 628 |
BaseGV->printAsOperand(OS, /*PrintType=*/false); |
628 |
BaseGV->printAsOperand(OS, /*PrintType=*/false); |
| 629 |
} |
629 |
} |
| 630 |
if (BaseOffset != 0) { |
630 |
if (BaseOffset != 0) { |
| 631 |
if (!First) OS << " + "; else First = false; |
631 |
if (!First) OS << " + "; else First = false; |
| 632 |
OS << BaseOffset; |
632 |
OS << BaseOffset; |
| 633 |
} |
633 |
} |
| 634 |
for (const SCEV *BaseReg : BaseRegs) { |
634 |
for (const SCEV *BaseReg : BaseRegs) { |
| 635 |
if (!First) OS << " + "; else First = false; |
635 |
if (!First) OS << " + "; else First = false; |
| 636 |
OS << "reg(" << *BaseReg << ')'; |
636 |
OS << "reg(" << *BaseReg << ')'; |
| 637 |
} |
637 |
} |
| 638 |
if (HasBaseReg && BaseRegs.empty()) { |
638 |
if (HasBaseReg && BaseRegs.empty()) { |
| 639 |
if (!First) OS << " + "; else First = false; |
639 |
if (!First) OS << " + "; else First = false; |
| 640 |
OS << "**error: HasBaseReg**"; |
640 |
OS << "**error: HasBaseReg**"; |
| 641 |
} else if (!HasBaseReg && !BaseRegs.empty()) { |
641 |
} else if (!HasBaseReg && !BaseRegs.empty()) { |
| 642 |
if (!First) OS << " + "; else First = false; |
642 |
if (!First) OS << " + "; else First = false; |
| 643 |
OS << "**error: !HasBaseReg**"; |
643 |
OS << "**error: !HasBaseReg**"; |
| 644 |
} |
644 |
} |
| 645 |
if (Scale != 0) { |
645 |
if (Scale != 0) { |
| 646 |
if (!First) OS << " + "; else First = false; |
646 |
if (!First) OS << " + "; else First = false; |
| 647 |
OS << Scale << "*reg("; |
647 |
OS << Scale << "*reg("; |
| 648 |
if (ScaledReg) |
648 |
if (ScaledReg) |
| 649 |
OS << *ScaledReg; |
649 |
OS << *ScaledReg; |
| 650 |
else |
650 |
else |
| 651 |
OS << ""; |
651 |
OS << ""; |
| 652 |
OS << ')'; |
652 |
OS << ')'; |
| 653 |
} |
653 |
} |
| 654 |
if (UnfoldedOffset != 0) { |
654 |
if (UnfoldedOffset != 0) { |
| 655 |
if (!First) OS << " + "; |
655 |
if (!First) OS << " + "; |
| 656 |
OS << "imm(" << UnfoldedOffset << ')'; |
656 |
OS << "imm(" << UnfoldedOffset << ')'; |
| 657 |
} |
657 |
} |
| 658 |
} |
658 |
} |
| 659 |
|
659 |
|
| 660 |
LLVM_DUMP_METHOD void Formula::dump() const { |
660 |
LLVM_DUMP_METHOD void Formula::dump() const { |
| 661 |
print(errs()); errs() << '\n'; |
661 |
print(errs()); errs() << '\n'; |
| 662 |
} |
662 |
} |
| 663 |
#endif |
663 |
#endif |
| 664 |
|
664 |
|
| 665 |
/// Return true if the given addrec can be sign-extended without changing its |
665 |
/// Return true if the given addrec can be sign-extended without changing its |
| 666 |
/// value. |
666 |
/// value. |
| 667 |
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { |
667 |
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { |
| 668 |
Type *WideTy = |
668 |
Type *WideTy = |
| 669 |
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); |
669 |
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); |
| 670 |
return isa(SE.getSignExtendExpr(AR, WideTy)); |
670 |
return isa(SE.getSignExtendExpr(AR, WideTy)); |
| 671 |
} |
671 |
} |
| 672 |
|
672 |
|
| 673 |
/// Return true if the given add can be sign-extended without changing its |
673 |
/// Return true if the given add can be sign-extended without changing its |
| 674 |
/// value. |
674 |
/// value. |
| 675 |
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { |
675 |
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { |
| 676 |
Type *WideTy = |
676 |
Type *WideTy = |
| 677 |
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); |
677 |
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); |
| 678 |
return isa(SE.getSignExtendExpr(A, WideTy)); |
678 |
return isa(SE.getSignExtendExpr(A, WideTy)); |
| 679 |
} |
679 |
} |
| 680 |
|
680 |
|
| 681 |
/// Return true if the given mul can be sign-extended without changing its |
681 |
/// Return true if the given mul can be sign-extended without changing its |
| 682 |
/// value. |
682 |
/// value. |
| 683 |
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { |
683 |
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { |
| 684 |
Type *WideTy = |
684 |
Type *WideTy = |
| 685 |
IntegerType::get(SE.getContext(), |
685 |
IntegerType::get(SE.getContext(), |
| 686 |
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); |
686 |
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); |
| 687 |
return isa(SE.getSignExtendExpr(M, WideTy)); |
687 |
return isa(SE.getSignExtendExpr(M, WideTy)); |
| 688 |
} |
688 |
} |
| 689 |
|
689 |
|
| 690 |
/// Return an expression for LHS /s RHS, if it can be determined and if the |
690 |
/// Return an expression for LHS /s RHS, if it can be determined and if the |
| 691 |
/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits |
691 |
/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits |
| 692 |
/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that |
692 |
/// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that |
| 693 |
/// the multiplication may overflow, which is useful when the result will be |
693 |
/// the multiplication may overflow, which is useful when the result will be |
| 694 |
/// used in a context where the most significant bits are ignored. |
694 |
/// used in a context where the most significant bits are ignored. |
| 695 |
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, |
695 |
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, |
| 696 |
ScalarEvolution &SE, |
696 |
ScalarEvolution &SE, |
| 697 |
bool IgnoreSignificantBits = false) { |
697 |
bool IgnoreSignificantBits = false) { |
| 698 |
// Handle the trivial case, which works for any SCEV type. |
698 |
// Handle the trivial case, which works for any SCEV type. |
| 699 |
if (LHS == RHS) |
699 |
if (LHS == RHS) |
| 700 |
return SE.getConstant(LHS->getType(), 1); |
700 |
return SE.getConstant(LHS->getType(), 1); |
| 701 |
|
701 |
|
| 702 |
// Handle a few RHS special cases. |
702 |
// Handle a few RHS special cases. |
| 703 |
const SCEVConstant *RC = dyn_cast(RHS); |
703 |
const SCEVConstant *RC = dyn_cast(RHS); |
| 704 |
if (RC) { |
704 |
if (RC) { |
| 705 |
const APInt &RA = RC->getAPInt(); |
705 |
const APInt &RA = RC->getAPInt(); |
| 706 |
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do |
706 |
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do |
| 707 |
// some folding. |
707 |
// some folding. |
| 708 |
if (RA.isAllOnes()) { |
708 |
if (RA.isAllOnes()) { |
| 709 |
if (LHS->getType()->isPointerTy()) |
709 |
if (LHS->getType()->isPointerTy()) |
| 710 |
return nullptr; |
710 |
return nullptr; |
| 711 |
return SE.getMulExpr(LHS, RC); |
711 |
return SE.getMulExpr(LHS, RC); |
| 712 |
} |
712 |
} |
| 713 |
// Handle x /s 1 as x. |
713 |
// Handle x /s 1 as x. |
| 714 |
if (RA == 1) |
714 |
if (RA == 1) |
| 715 |
return LHS; |
715 |
return LHS; |
| 716 |
} |
716 |
} |
| 717 |
|
717 |
|
| 718 |
// Check for a division of a constant by a constant. |
718 |
// Check for a division of a constant by a constant. |
| 719 |
if (const SCEVConstant *C = dyn_cast(LHS)) { |
719 |
if (const SCEVConstant *C = dyn_cast(LHS)) { |
| 720 |
if (!RC) |
720 |
if (!RC) |
| 721 |
return nullptr; |
721 |
return nullptr; |
| 722 |
const APInt &LA = C->getAPInt(); |
722 |
const APInt &LA = C->getAPInt(); |
| 723 |
const APInt &RA = RC->getAPInt(); |
723 |
const APInt &RA = RC->getAPInt(); |
| 724 |
if (LA.srem(RA) != 0) |
724 |
if (LA.srem(RA) != 0) |
| 725 |
return nullptr; |
725 |
return nullptr; |
| 726 |
return SE.getConstant(LA.sdiv(RA)); |
726 |
return SE.getConstant(LA.sdiv(RA)); |
| 727 |
} |
727 |
} |
| 728 |
|
728 |
|
| 729 |
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow. |
729 |
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow. |
| 730 |
if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) { |
730 |
if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) { |
| 731 |
if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) { |
731 |
if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) { |
| 732 |
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, |
732 |
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, |
| 733 |
IgnoreSignificantBits); |
733 |
IgnoreSignificantBits); |
| 734 |
if (!Step) return nullptr; |
734 |
if (!Step) return nullptr; |
| 735 |
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, |
735 |
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, |
| 736 |
IgnoreSignificantBits); |
736 |
IgnoreSignificantBits); |
| 737 |
if (!Start) return nullptr; |
737 |
if (!Start) return nullptr; |
| 738 |
// FlagNW is independent of the start value, step direction, and is |
738 |
// FlagNW is independent of the start value, step direction, and is |
| 739 |
// preserved with smaller magnitude steps. |
739 |
// preserved with smaller magnitude steps. |
| 740 |
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
740 |
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
| 741 |
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); |
741 |
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); |
| 742 |
} |
742 |
} |
| 743 |
return nullptr; |
743 |
return nullptr; |
| 744 |
} |
744 |
} |
| 745 |
|
745 |
|
| 746 |
// Distribute the sdiv over add operands, if the add doesn't overflow. |
746 |
// Distribute the sdiv over add operands, if the add doesn't overflow. |
| 747 |
if (const SCEVAddExpr *Add = dyn_cast(LHS)) { |
747 |
if (const SCEVAddExpr *Add = dyn_cast(LHS)) { |
| 748 |
if (IgnoreSignificantBits || isAddSExtable(Add, SE)) { |
748 |
if (IgnoreSignificantBits || isAddSExtable(Add, SE)) { |
| 749 |
SmallVector Ops; |
749 |
SmallVector Ops; |
| 750 |
for (const SCEV *S : Add->operands()) { |
750 |
for (const SCEV *S : Add->operands()) { |
| 751 |
const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits); |
751 |
const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits); |
| 752 |
if (!Op) return nullptr; |
752 |
if (!Op) return nullptr; |
| 753 |
Ops.push_back(Op); |
753 |
Ops.push_back(Op); |
| 754 |
} |
754 |
} |
| 755 |
return SE.getAddExpr(Ops); |
755 |
return SE.getAddExpr(Ops); |
| 756 |
} |
756 |
} |
| 757 |
return nullptr; |
757 |
return nullptr; |
| 758 |
} |
758 |
} |
| 759 |
|
759 |
|
| 760 |
// Check for a multiply operand that we can pull RHS out of. |
760 |
// Check for a multiply operand that we can pull RHS out of. |
| 761 |
if (const SCEVMulExpr *Mul = dyn_cast(LHS)) { |
761 |
if (const SCEVMulExpr *Mul = dyn_cast(LHS)) { |
| 762 |
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { |
762 |
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { |
| 763 |
// Handle special case C1*X*Y /s C2*X*Y. |
763 |
// Handle special case C1*X*Y /s C2*X*Y. |
| 764 |
if (const SCEVMulExpr *MulRHS = dyn_cast(RHS)) { |
764 |
if (const SCEVMulExpr *MulRHS = dyn_cast(RHS)) { |
| 765 |
if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) { |
765 |
if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) { |
| 766 |
const SCEVConstant *LC = dyn_cast(Mul->getOperand(0)); |
766 |
const SCEVConstant *LC = dyn_cast(Mul->getOperand(0)); |
| 767 |
const SCEVConstant *RC = |
767 |
const SCEVConstant *RC = |
| 768 |
dyn_cast(MulRHS->getOperand(0)); |
768 |
dyn_cast(MulRHS->getOperand(0)); |
| 769 |
if (LC && RC) { |
769 |
if (LC && RC) { |
| 770 |
SmallVector LOps(drop_begin(Mul->operands())); |
770 |
SmallVector LOps(drop_begin(Mul->operands())); |
| 771 |
SmallVector ROps(drop_begin(MulRHS->operands())); |
771 |
SmallVector ROps(drop_begin(MulRHS->operands())); |
| 772 |
if (LOps == ROps) |
772 |
if (LOps == ROps) |
| 773 |
return getExactSDiv(LC, RC, SE, IgnoreSignificantBits); |
773 |
return getExactSDiv(LC, RC, SE, IgnoreSignificantBits); |
| 774 |
} |
774 |
} |
| 775 |
} |
775 |
} |
| 776 |
} |
776 |
} |
| 777 |
|
777 |
|
| 778 |
SmallVector Ops; |
778 |
SmallVector Ops; |
| 779 |
bool Found = false; |
779 |
bool Found = false; |
| 780 |
for (const SCEV *S : Mul->operands()) { |
780 |
for (const SCEV *S : Mul->operands()) { |
| 781 |
if (!Found) |
781 |
if (!Found) |
| 782 |
if (const SCEV *Q = getExactSDiv(S, RHS, SE, |
782 |
if (const SCEV *Q = getExactSDiv(S, RHS, SE, |
| 783 |
IgnoreSignificantBits)) { |
783 |
IgnoreSignificantBits)) { |
| 784 |
S = Q; |
784 |
S = Q; |
| 785 |
Found = true; |
785 |
Found = true; |
| 786 |
} |
786 |
} |
| 787 |
Ops.push_back(S); |
787 |
Ops.push_back(S); |
| 788 |
} |
788 |
} |
| 789 |
return Found ? SE.getMulExpr(Ops) : nullptr; |
789 |
return Found ? SE.getMulExpr(Ops) : nullptr; |
| 790 |
} |
790 |
} |
| 791 |
return nullptr; |
791 |
return nullptr; |
| 792 |
} |
792 |
} |
| 793 |
|
793 |
|
| 794 |
// Otherwise we don't know. |
794 |
// Otherwise we don't know. |
| 795 |
return nullptr; |
795 |
return nullptr; |
| 796 |
} |
796 |
} |
| 797 |
|
797 |
|
| 798 |
/// If S involves the addition of a constant integer value, return that integer |
798 |
/// If S involves the addition of a constant integer value, return that integer |
| 799 |
/// value, and mutate S to point to a new SCEV with that value excluded. |
799 |
/// value, and mutate S to point to a new SCEV with that value excluded. |
| 800 |
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { |
800 |
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { |
| 801 |
if (const SCEVConstant *C = dyn_cast(S)) { |
801 |
if (const SCEVConstant *C = dyn_cast(S)) { |
| 802 |
if (C->getAPInt().getSignificantBits() <= 64) { |
802 |
if (C->getAPInt().getSignificantBits() <= 64) { |
| 803 |
S = SE.getConstant(C->getType(), 0); |
803 |
S = SE.getConstant(C->getType(), 0); |
| 804 |
return C->getValue()->getSExtValue(); |
804 |
return C->getValue()->getSExtValue(); |
| 805 |
} |
805 |
} |
| 806 |
} else if (const SCEVAddExpr *Add = dyn_cast(S)) { |
806 |
} else if (const SCEVAddExpr *Add = dyn_cast(S)) { |
| 807 |
SmallVector NewOps(Add->operands()); |
807 |
SmallVector NewOps(Add->operands()); |
| 808 |
int64_t Result = ExtractImmediate(NewOps.front(), SE); |
808 |
int64_t Result = ExtractImmediate(NewOps.front(), SE); |
| 809 |
if (Result != 0) |
809 |
if (Result != 0) |
| 810 |
S = SE.getAddExpr(NewOps); |
810 |
S = SE.getAddExpr(NewOps); |
| 811 |
return Result; |
811 |
return Result; |
| 812 |
} else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
812 |
} else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
| 813 |
SmallVector NewOps(AR->operands()); |
813 |
SmallVector NewOps(AR->operands()); |
| 814 |
int64_t Result = ExtractImmediate(NewOps.front(), SE); |
814 |
int64_t Result = ExtractImmediate(NewOps.front(), SE); |
| 815 |
if (Result != 0) |
815 |
if (Result != 0) |
| 816 |
S = SE.getAddRecExpr(NewOps, AR->getLoop(), |
816 |
S = SE.getAddRecExpr(NewOps, AR->getLoop(), |
| 817 |
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
817 |
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
| 818 |
SCEV::FlagAnyWrap); |
818 |
SCEV::FlagAnyWrap); |
| 819 |
return Result; |
819 |
return Result; |
| 820 |
} |
820 |
} |
| 821 |
return 0; |
821 |
return 0; |
| 822 |
} |
822 |
} |
| 823 |
|
823 |
|
| 824 |
/// If S involves the addition of a GlobalValue address, return that symbol, and |
824 |
/// If S involves the addition of a GlobalValue address, return that symbol, and |
| 825 |
/// mutate S to point to a new SCEV with that value excluded. |
825 |
/// mutate S to point to a new SCEV with that value excluded. |
| 826 |
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { |
826 |
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { |
| 827 |
if (const SCEVUnknown *U = dyn_cast(S)) { |
827 |
if (const SCEVUnknown *U = dyn_cast(S)) { |
| 828 |
if (GlobalValue *GV = dyn_cast(U->getValue())) { |
828 |
if (GlobalValue *GV = dyn_cast(U->getValue())) { |
| 829 |
S = SE.getConstant(GV->getType(), 0); |
829 |
S = SE.getConstant(GV->getType(), 0); |
| 830 |
return GV; |
830 |
return GV; |
| 831 |
} |
831 |
} |
| 832 |
} else if (const SCEVAddExpr *Add = dyn_cast(S)) { |
832 |
} else if (const SCEVAddExpr *Add = dyn_cast(S)) { |
| 833 |
SmallVector NewOps(Add->operands()); |
833 |
SmallVector NewOps(Add->operands()); |
| 834 |
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); |
834 |
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); |
| 835 |
if (Result) |
835 |
if (Result) |
| 836 |
S = SE.getAddExpr(NewOps); |
836 |
S = SE.getAddExpr(NewOps); |
| 837 |
return Result; |
837 |
return Result; |
| 838 |
} else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
838 |
} else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
| 839 |
SmallVector NewOps(AR->operands()); |
839 |
SmallVector NewOps(AR->operands()); |
| 840 |
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); |
840 |
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); |
| 841 |
if (Result) |
841 |
if (Result) |
| 842 |
S = SE.getAddRecExpr(NewOps, AR->getLoop(), |
842 |
S = SE.getAddRecExpr(NewOps, AR->getLoop(), |
| 843 |
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
843 |
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
| 844 |
SCEV::FlagAnyWrap); |
844 |
SCEV::FlagAnyWrap); |
| 845 |
return Result; |
845 |
return Result; |
| 846 |
} |
846 |
} |
| 847 |
return nullptr; |
847 |
return nullptr; |
| 848 |
} |
848 |
} |
| 849 |
|
849 |
|
| 850 |
/// Returns true if the specified instruction is using the specified value as an |
850 |
/// Returns true if the specified instruction is using the specified value as an |
| 851 |
/// address. |
851 |
/// address. |
| 852 |
static bool isAddressUse(const TargetTransformInfo &TTI, |
852 |
static bool isAddressUse(const TargetTransformInfo &TTI, |
| 853 |
Instruction *Inst, Value *OperandVal) { |
853 |
Instruction *Inst, Value *OperandVal) { |
| 854 |
bool isAddress = isa(Inst); |
854 |
bool isAddress = isa(Inst); |
| 855 |
if (StoreInst *SI = dyn_cast(Inst)) { |
855 |
if (StoreInst *SI = dyn_cast(Inst)) { |
| 856 |
if (SI->getPointerOperand() == OperandVal) |
856 |
if (SI->getPointerOperand() == OperandVal) |
| 857 |
isAddress = true; |
857 |
isAddress = true; |
| 858 |
} else if (IntrinsicInst *II = dyn_cast(Inst)) { |
858 |
} else if (IntrinsicInst *II = dyn_cast(Inst)) { |
| 859 |
// Addressing modes can also be folded into prefetches and a variety |
859 |
// Addressing modes can also be folded into prefetches and a variety |
| 860 |
// of intrinsics. |
860 |
// of intrinsics. |
| 861 |
switch (II->getIntrinsicID()) { |
861 |
switch (II->getIntrinsicID()) { |
| 862 |
case Intrinsic::memset: |
862 |
case Intrinsic::memset: |
| 863 |
case Intrinsic::prefetch: |
863 |
case Intrinsic::prefetch: |
| 864 |
case Intrinsic::masked_load: |
864 |
case Intrinsic::masked_load: |
| 865 |
if (II->getArgOperand(0) == OperandVal) |
865 |
if (II->getArgOperand(0) == OperandVal) |
| 866 |
isAddress = true; |
866 |
isAddress = true; |
| 867 |
break; |
867 |
break; |
| 868 |
case Intrinsic::masked_store: |
868 |
case Intrinsic::masked_store: |
| 869 |
if (II->getArgOperand(1) == OperandVal) |
869 |
if (II->getArgOperand(1) == OperandVal) |
| 870 |
isAddress = true; |
870 |
isAddress = true; |
| 871 |
break; |
871 |
break; |
| 872 |
case Intrinsic::memmove: |
872 |
case Intrinsic::memmove: |
| 873 |
case Intrinsic::memcpy: |
873 |
case Intrinsic::memcpy: |
| 874 |
if (II->getArgOperand(0) == OperandVal || |
874 |
if (II->getArgOperand(0) == OperandVal || |
| 875 |
II->getArgOperand(1) == OperandVal) |
875 |
II->getArgOperand(1) == OperandVal) |
| 876 |
isAddress = true; |
876 |
isAddress = true; |
| 877 |
break; |
877 |
break; |
| 878 |
default: { |
878 |
default: { |
| 879 |
MemIntrinsicInfo IntrInfo; |
879 |
MemIntrinsicInfo IntrInfo; |
| 880 |
if (TTI.getTgtMemIntrinsic(II, IntrInfo)) { |
880 |
if (TTI.getTgtMemIntrinsic(II, IntrInfo)) { |
| 881 |
if (IntrInfo.PtrVal == OperandVal) |
881 |
if (IntrInfo.PtrVal == OperandVal) |
| 882 |
isAddress = true; |
882 |
isAddress = true; |
| 883 |
} |
883 |
} |
| 884 |
} |
884 |
} |
| 885 |
} |
885 |
} |
| 886 |
} else if (AtomicRMWInst *RMW = dyn_cast(Inst)) { |
886 |
} else if (AtomicRMWInst *RMW = dyn_cast(Inst)) { |
| 887 |
if (RMW->getPointerOperand() == OperandVal) |
887 |
if (RMW->getPointerOperand() == OperandVal) |
| 888 |
isAddress = true; |
888 |
isAddress = true; |
| 889 |
} else if (AtomicCmpXchgInst *CmpX = dyn_cast(Inst)) { |
889 |
} else if (AtomicCmpXchgInst *CmpX = dyn_cast(Inst)) { |
| 890 |
if (CmpX->getPointerOperand() == OperandVal) |
890 |
if (CmpX->getPointerOperand() == OperandVal) |
| 891 |
isAddress = true; |
891 |
isAddress = true; |
| 892 |
} |
892 |
} |
| 893 |
return isAddress; |
893 |
return isAddress; |
| 894 |
} |
894 |
} |
| 895 |
|
895 |
|
| 896 |
/// Return the type of the memory being accessed. |
896 |
/// Return the type of the memory being accessed. |
| 897 |
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, |
897 |
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, |
| 898 |
Instruction *Inst, Value *OperandVal) { |
898 |
Instruction *Inst, Value *OperandVal) { |
| 899 |
MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext()); |
899 |
MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext()); |
| 900 |
|
900 |
|
| 901 |
// First get the type of memory being accessed. |
901 |
// First get the type of memory being accessed. |
| 902 |
if (Type *Ty = Inst->getAccessType()) |
902 |
if (Type *Ty = Inst->getAccessType()) |
| 903 |
AccessTy.MemTy = Ty; |
903 |
AccessTy.MemTy = Ty; |
| 904 |
|
904 |
|
| 905 |
// Then get the pointer address space. |
905 |
// Then get the pointer address space. |
| 906 |
if (const StoreInst *SI = dyn_cast(Inst)) { |
906 |
if (const StoreInst *SI = dyn_cast(Inst)) { |
| 907 |
AccessTy.AddrSpace = SI->getPointerAddressSpace(); |
907 |
AccessTy.AddrSpace = SI->getPointerAddressSpace(); |
| 908 |
} else if (const LoadInst *LI = dyn_cast(Inst)) { |
908 |
} else if (const LoadInst *LI = dyn_cast(Inst)) { |
| 909 |
AccessTy.AddrSpace = LI->getPointerAddressSpace(); |
909 |
AccessTy.AddrSpace = LI->getPointerAddressSpace(); |
| 910 |
} else if (const AtomicRMWInst *RMW = dyn_cast(Inst)) { |
910 |
} else if (const AtomicRMWInst *RMW = dyn_cast(Inst)) { |
| 911 |
AccessTy.AddrSpace = RMW->getPointerAddressSpace(); |
911 |
AccessTy.AddrSpace = RMW->getPointerAddressSpace(); |
| 912 |
} else if (const AtomicCmpXchgInst *CmpX = dyn_cast(Inst)) { |
912 |
} else if (const AtomicCmpXchgInst *CmpX = dyn_cast(Inst)) { |
| 913 |
AccessTy.AddrSpace = CmpX->getPointerAddressSpace(); |
913 |
AccessTy.AddrSpace = CmpX->getPointerAddressSpace(); |
| 914 |
} else if (IntrinsicInst *II = dyn_cast(Inst)) { |
914 |
} else if (IntrinsicInst *II = dyn_cast(Inst)) { |
| 915 |
switch (II->getIntrinsicID()) { |
915 |
switch (II->getIntrinsicID()) { |
| 916 |
case Intrinsic::prefetch: |
916 |
case Intrinsic::prefetch: |
| 917 |
case Intrinsic::memset: |
917 |
case Intrinsic::memset: |
| 918 |
AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace(); |
918 |
AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace(); |
| 919 |
AccessTy.MemTy = OperandVal->getType(); |
919 |
AccessTy.MemTy = OperandVal->getType(); |
| 920 |
break; |
920 |
break; |
| 921 |
case Intrinsic::memmove: |
921 |
case Intrinsic::memmove: |
| 922 |
case Intrinsic::memcpy: |
922 |
case Intrinsic::memcpy: |
| 923 |
AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace(); |
923 |
AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace(); |
| 924 |
AccessTy.MemTy = OperandVal->getType(); |
924 |
AccessTy.MemTy = OperandVal->getType(); |
| 925 |
break; |
925 |
break; |
| 926 |
case Intrinsic::masked_load: |
926 |
case Intrinsic::masked_load: |
| 927 |
AccessTy.AddrSpace = |
927 |
AccessTy.AddrSpace = |
| 928 |
II->getArgOperand(0)->getType()->getPointerAddressSpace(); |
928 |
II->getArgOperand(0)->getType()->getPointerAddressSpace(); |
| 929 |
break; |
929 |
break; |
| 930 |
case Intrinsic::masked_store: |
930 |
case Intrinsic::masked_store: |
| 931 |
AccessTy.AddrSpace = |
931 |
AccessTy.AddrSpace = |
| 932 |
II->getArgOperand(1)->getType()->getPointerAddressSpace(); |
932 |
II->getArgOperand(1)->getType()->getPointerAddressSpace(); |
| 933 |
break; |
933 |
break; |
| 934 |
default: { |
934 |
default: { |
| 935 |
MemIntrinsicInfo IntrInfo; |
935 |
MemIntrinsicInfo IntrInfo; |
| 936 |
if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) { |
936 |
if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) { |
| 937 |
AccessTy.AddrSpace |
937 |
AccessTy.AddrSpace |
| 938 |
= IntrInfo.PtrVal->getType()->getPointerAddressSpace(); |
938 |
= IntrInfo.PtrVal->getType()->getPointerAddressSpace(); |
| 939 |
} |
939 |
} |
| 940 |
|
940 |
|
| 941 |
break; |
941 |
break; |
| 942 |
} |
942 |
} |
| 943 |
} |
943 |
} |
| 944 |
} |
944 |
} |
| 945 |
|
945 |
|
| 946 |
// All pointers have the same requirements, so canonicalize them to an |
946 |
// All pointers have the same requirements, so canonicalize them to an |
| 947 |
// arbitrary pointer type to minimize variation. |
947 |
// arbitrary pointer type to minimize variation. |
| 948 |
if (PointerType *PTy = dyn_cast(AccessTy.MemTy)) |
948 |
if (PointerType *PTy = dyn_cast(AccessTy.MemTy)) |
| 949 |
AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), |
949 |
AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), |
| 950 |
PTy->getAddressSpace()); |
950 |
PTy->getAddressSpace()); |
| 951 |
|
951 |
|
| 952 |
return AccessTy; |
952 |
return AccessTy; |
| 953 |
} |
953 |
} |
| 954 |
|
954 |
|
| 955 |
/// Return true if this AddRec is already a phi in its loop. |
955 |
/// Return true if this AddRec is already a phi in its loop. |
| 956 |
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { |
956 |
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { |
| 957 |
for (PHINode &PN : AR->getLoop()->getHeader()->phis()) { |
957 |
for (PHINode &PN : AR->getLoop()->getHeader()->phis()) { |
| 958 |
if (SE.isSCEVable(PN.getType()) && |
958 |
if (SE.isSCEVable(PN.getType()) && |
| 959 |
(SE.getEffectiveSCEVType(PN.getType()) == |
959 |
(SE.getEffectiveSCEVType(PN.getType()) == |
| 960 |
SE.getEffectiveSCEVType(AR->getType())) && |
960 |
SE.getEffectiveSCEVType(AR->getType())) && |
| 961 |
SE.getSCEV(&PN) == AR) |
961 |
SE.getSCEV(&PN) == AR) |
| 962 |
return true; |
962 |
return true; |
| 963 |
} |
963 |
} |
| 964 |
return false; |
964 |
return false; |
| 965 |
} |
965 |
} |
| 966 |
|
966 |
|
| 967 |
/// Check if expanding this expression is likely to incur significant cost. This |
967 |
/// Check if expanding this expression is likely to incur significant cost. This |
| 968 |
/// is tricky because SCEV doesn't track which expressions are actually computed |
968 |
/// is tricky because SCEV doesn't track which expressions are actually computed |
| 969 |
/// by the current IR. |
969 |
/// by the current IR. |
| 970 |
/// |
970 |
/// |
| 971 |
/// We currently allow expansion of IV increments that involve adds, |
971 |
/// We currently allow expansion of IV increments that involve adds, |
| 972 |
/// multiplication by constants, and AddRecs from existing phis. |
972 |
/// multiplication by constants, and AddRecs from existing phis. |
| 973 |
/// |
973 |
/// |
| 974 |
/// TODO: Allow UDivExpr if we can find an existing IV increment that is an |
974 |
/// TODO: Allow UDivExpr if we can find an existing IV increment that is an |
| 975 |
/// obvious multiple of the UDivExpr. |
975 |
/// obvious multiple of the UDivExpr. |
| 976 |
static bool isHighCostExpansion(const SCEV *S, |
976 |
static bool isHighCostExpansion(const SCEV *S, |
| 977 |
SmallPtrSetImpl &Processed, |
977 |
SmallPtrSetImpl &Processed, |
| 978 |
ScalarEvolution &SE) { |
978 |
ScalarEvolution &SE) { |
| 979 |
// Zero/One operand expressions |
979 |
// Zero/One operand expressions |
| 980 |
switch (S->getSCEVType()) { |
980 |
switch (S->getSCEVType()) { |
| 981 |
case scUnknown: |
981 |
case scUnknown: |
| 982 |
case scConstant: |
982 |
case scConstant: |
| 983 |
case scVScale: |
983 |
case scVScale: |
| 984 |
return false; |
984 |
return false; |
| 985 |
case scTruncate: |
985 |
case scTruncate: |
| 986 |
return isHighCostExpansion(cast(S)->getOperand(), |
986 |
return isHighCostExpansion(cast(S)->getOperand(), |
| 987 |
Processed, SE); |
987 |
Processed, SE); |
| 988 |
case scZeroExtend: |
988 |
case scZeroExtend: |
| 989 |
return isHighCostExpansion(cast(S)->getOperand(), |
989 |
return isHighCostExpansion(cast(S)->getOperand(), |
| 990 |
Processed, SE); |
990 |
Processed, SE); |
| 991 |
case scSignExtend: |
991 |
case scSignExtend: |
| 992 |
return isHighCostExpansion(cast(S)->getOperand(), |
992 |
return isHighCostExpansion(cast(S)->getOperand(), |
| 993 |
Processed, SE); |
993 |
Processed, SE); |
| 994 |
default: |
994 |
default: |
| 995 |
break; |
995 |
break; |
| 996 |
} |
996 |
} |
| 997 |
|
997 |
|
| 998 |
if (!Processed.insert(S).second) |
998 |
if (!Processed.insert(S).second) |
| 999 |
return false; |
999 |
return false; |
| 1000 |
|
1000 |
|
| 1001 |
if (const SCEVAddExpr *Add = dyn_cast(S)) { |
1001 |
if (const SCEVAddExpr *Add = dyn_cast(S)) { |
| 1002 |
for (const SCEV *S : Add->operands()) { |
1002 |
for (const SCEV *S : Add->operands()) { |
| 1003 |
if (isHighCostExpansion(S, Processed, SE)) |
1003 |
if (isHighCostExpansion(S, Processed, SE)) |
| 1004 |
return true; |
1004 |
return true; |
| 1005 |
} |
1005 |
} |
| 1006 |
return false; |
1006 |
return false; |
| 1007 |
} |
1007 |
} |
| 1008 |
|
1008 |
|
| 1009 |
if (const SCEVMulExpr *Mul = dyn_cast(S)) { |
1009 |
if (const SCEVMulExpr *Mul = dyn_cast(S)) { |
| 1010 |
if (Mul->getNumOperands() == 2) { |
1010 |
if (Mul->getNumOperands() == 2) { |
| 1011 |
// Multiplication by a constant is ok |
1011 |
// Multiplication by a constant is ok |
| 1012 |
if (isa(Mul->getOperand(0))) |
1012 |
if (isa(Mul->getOperand(0))) |
| 1013 |
return isHighCostExpansion(Mul->getOperand(1), Processed, SE); |
1013 |
return isHighCostExpansion(Mul->getOperand(1), Processed, SE); |
| 1014 |
|
1014 |
|
| 1015 |
// If we have the value of one operand, check if an existing |
1015 |
// If we have the value of one operand, check if an existing |
| 1016 |
// multiplication already generates this expression. |
1016 |
// multiplication already generates this expression. |
| 1017 |
if (const SCEVUnknown *U = dyn_cast(Mul->getOperand(1))) { |
1017 |
if (const SCEVUnknown *U = dyn_cast(Mul->getOperand(1))) { |
| 1018 |
Value *UVal = U->getValue(); |
1018 |
Value *UVal = U->getValue(); |
| 1019 |
for (User *UR : UVal->users()) { |
1019 |
for (User *UR : UVal->users()) { |
| 1020 |
// If U is a constant, it may be used by a ConstantExpr. |
1020 |
// If U is a constant, it may be used by a ConstantExpr. |
| 1021 |
Instruction *UI = dyn_cast(UR); |
1021 |
Instruction *UI = dyn_cast(UR); |
| 1022 |
if (UI && UI->getOpcode() == Instruction::Mul && |
1022 |
if (UI && UI->getOpcode() == Instruction::Mul && |
| 1023 |
SE.isSCEVable(UI->getType())) { |
1023 |
SE.isSCEVable(UI->getType())) { |
| 1024 |
return SE.getSCEV(UI) == Mul; |
1024 |
return SE.getSCEV(UI) == Mul; |
| 1025 |
} |
1025 |
} |
| 1026 |
} |
1026 |
} |
| 1027 |
} |
1027 |
} |
| 1028 |
} |
1028 |
} |
| 1029 |
} |
1029 |
} |
| 1030 |
|
1030 |
|
| 1031 |
if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
1031 |
if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
| 1032 |
if (isExistingPhi(AR, SE)) |
1032 |
if (isExistingPhi(AR, SE)) |
| 1033 |
return false; |
1033 |
return false; |
| 1034 |
} |
1034 |
} |
| 1035 |
|
1035 |
|
| 1036 |
// Fow now, consider any other type of expression (div/mul/min/max) high cost. |
1036 |
// Fow now, consider any other type of expression (div/mul/min/max) high cost. |
| 1037 |
return true; |
1037 |
return true; |
| 1038 |
} |
1038 |
} |
| 1039 |
|
1039 |
|
| 1040 |
namespace { |
1040 |
namespace { |
| 1041 |
|
1041 |
|
| 1042 |
class LSRUse; |
1042 |
class LSRUse; |
| 1043 |
|
1043 |
|
| 1044 |
} // end anonymous namespace |
1044 |
} // end anonymous namespace |
| 1045 |
|
1045 |
|
| 1046 |
/// Check if the addressing mode defined by \p F is completely |
1046 |
/// Check if the addressing mode defined by \p F is completely |
| 1047 |
/// folded in \p LU at isel time. |
1047 |
/// folded in \p LU at isel time. |
| 1048 |
/// This includes address-mode folding and special icmp tricks. |
1048 |
/// This includes address-mode folding and special icmp tricks. |
| 1049 |
/// This function returns true if \p LU can accommodate what \p F |
1049 |
/// This function returns true if \p LU can accommodate what \p F |
| 1050 |
/// defines and up to 1 base + 1 scaled + offset. |
1050 |
/// defines and up to 1 base + 1 scaled + offset. |
| 1051 |
/// In other words, if \p F has several base registers, this function may |
1051 |
/// In other words, if \p F has several base registers, this function may |
| 1052 |
/// still return true. Therefore, users still need to account for |
1052 |
/// still return true. Therefore, users still need to account for |
| 1053 |
/// additional base registers and/or unfolded offsets to derive an |
1053 |
/// additional base registers and/or unfolded offsets to derive an |
| 1054 |
/// accurate cost model. |
1054 |
/// accurate cost model. |
| 1055 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
1055 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
| 1056 |
const LSRUse &LU, const Formula &F); |
1056 |
const LSRUse &LU, const Formula &F); |
| 1057 |
|
1057 |
|
| 1058 |
// Get the cost of the scaling factor used in F for LU. |
1058 |
// Get the cost of the scaling factor used in F for LU. |
| 1059 |
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, |
1059 |
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, |
| 1060 |
const LSRUse &LU, const Formula &F, |
1060 |
const LSRUse &LU, const Formula &F, |
| 1061 |
const Loop &L); |
1061 |
const Loop &L); |
| 1062 |
|
1062 |
|
| 1063 |
namespace { |
1063 |
namespace { |
| 1064 |
|
1064 |
|
| 1065 |
/// This class is used to measure and compare candidate formulae. |
1065 |
/// This class is used to measure and compare candidate formulae. |
| 1066 |
class Cost { |
1066 |
class Cost { |
| 1067 |
const Loop *L = nullptr; |
1067 |
const Loop *L = nullptr; |
| 1068 |
ScalarEvolution *SE = nullptr; |
1068 |
ScalarEvolution *SE = nullptr; |
| 1069 |
const TargetTransformInfo *TTI = nullptr; |
1069 |
const TargetTransformInfo *TTI = nullptr; |
| 1070 |
TargetTransformInfo::LSRCost C; |
1070 |
TargetTransformInfo::LSRCost C; |
| 1071 |
TTI::AddressingModeKind AMK = TTI::AMK_None; |
1071 |
TTI::AddressingModeKind AMK = TTI::AMK_None; |
| 1072 |
|
1072 |
|
| 1073 |
public: |
1073 |
public: |
| 1074 |
Cost() = delete; |
1074 |
Cost() = delete; |
| 1075 |
Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, |
1075 |
Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, |
| 1076 |
TTI::AddressingModeKind AMK) : |
1076 |
TTI::AddressingModeKind AMK) : |
| 1077 |
L(L), SE(&SE), TTI(&TTI), AMK(AMK) { |
1077 |
L(L), SE(&SE), TTI(&TTI), AMK(AMK) { |
| 1078 |
C.Insns = 0; |
1078 |
C.Insns = 0; |
| 1079 |
C.NumRegs = 0; |
1079 |
C.NumRegs = 0; |
| 1080 |
C.AddRecCost = 0; |
1080 |
C.AddRecCost = 0; |
| 1081 |
C.NumIVMuls = 0; |
1081 |
C.NumIVMuls = 0; |
| 1082 |
C.NumBaseAdds = 0; |
1082 |
C.NumBaseAdds = 0; |
| 1083 |
C.ImmCost = 0; |
1083 |
C.ImmCost = 0; |
| 1084 |
C.SetupCost = 0; |
1084 |
C.SetupCost = 0; |
| 1085 |
C.ScaleCost = 0; |
1085 |
C.ScaleCost = 0; |
| 1086 |
} |
1086 |
} |
| 1087 |
|
1087 |
|
| 1088 |
bool isLess(const Cost &Other) const; |
1088 |
bool isLess(const Cost &Other) const; |
| 1089 |
|
1089 |
|
| 1090 |
void Lose(); |
1090 |
void Lose(); |
| 1091 |
|
1091 |
|
| 1092 |
#ifndef NDEBUG |
1092 |
#ifndef NDEBUG |
| 1093 |
// Once any of the metrics loses, they must all remain losers. |
1093 |
// Once any of the metrics loses, they must all remain losers. |
| 1094 |
bool isValid() { |
1094 |
bool isValid() { |
| 1095 |
return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds |
1095 |
return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds |
| 1096 |
| C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u) |
1096 |
| C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u) |
| 1097 |
|| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds |
1097 |
|| ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds |
| 1098 |
& C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u); |
1098 |
& C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u); |
| 1099 |
} |
1099 |
} |
| 1100 |
#endif |
1100 |
#endif |
| 1101 |
|
1101 |
|
| 1102 |
bool isLoser() { |
1102 |
bool isLoser() { |
| 1103 |
assert(isValid() && "invalid cost"); |
1103 |
assert(isValid() && "invalid cost"); |
| 1104 |
return C.NumRegs == ~0u; |
1104 |
return C.NumRegs == ~0u; |
| 1105 |
} |
1105 |
} |
| 1106 |
|
1106 |
|
| 1107 |
void RateFormula(const Formula &F, |
1107 |
void RateFormula(const Formula &F, |
| 1108 |
SmallPtrSetImpl &Regs, |
1108 |
SmallPtrSetImpl &Regs, |
| 1109 |
const DenseSet &VisitedRegs, |
1109 |
const DenseSet &VisitedRegs, |
| 1110 |
const LSRUse &LU, |
1110 |
const LSRUse &LU, |
| 1111 |
SmallPtrSetImpl *LoserRegs = nullptr); |
1111 |
SmallPtrSetImpl *LoserRegs = nullptr); |
| 1112 |
|
1112 |
|
| 1113 |
void print(raw_ostream &OS) const; |
1113 |
void print(raw_ostream &OS) const; |
| 1114 |
void dump() const; |
1114 |
void dump() const; |
| 1115 |
|
1115 |
|
| 1116 |
private: |
1116 |
private: |
| 1117 |
void RateRegister(const Formula &F, const SCEV *Reg, |
1117 |
void RateRegister(const Formula &F, const SCEV *Reg, |
| 1118 |
SmallPtrSetImpl &Regs); |
1118 |
SmallPtrSetImpl &Regs); |
| 1119 |
void RatePrimaryRegister(const Formula &F, const SCEV *Reg, |
1119 |
void RatePrimaryRegister(const Formula &F, const SCEV *Reg, |
| 1120 |
SmallPtrSetImpl &Regs, |
1120 |
SmallPtrSetImpl &Regs, |
| 1121 |
SmallPtrSetImpl *LoserRegs); |
1121 |
SmallPtrSetImpl *LoserRegs); |
| 1122 |
}; |
1122 |
}; |
| 1123 |
|
1123 |
|
| 1124 |
/// An operand value in an instruction which is to be replaced with some |
1124 |
/// An operand value in an instruction which is to be replaced with some |
| 1125 |
/// equivalent, possibly strength-reduced, replacement. |
1125 |
/// equivalent, possibly strength-reduced, replacement. |
| 1126 |
struct LSRFixup { |
1126 |
struct LSRFixup { |
| 1127 |
/// The instruction which will be updated. |
1127 |
/// The instruction which will be updated. |
| 1128 |
Instruction *UserInst = nullptr; |
1128 |
Instruction *UserInst = nullptr; |
| 1129 |
|
1129 |
|
| 1130 |
/// The operand of the instruction which will be replaced. The operand may be |
1130 |
/// The operand of the instruction which will be replaced. The operand may be |
| 1131 |
/// used more than once; every instance will be replaced. |
1131 |
/// used more than once; every instance will be replaced. |
| 1132 |
Value *OperandValToReplace = nullptr; |
1132 |
Value *OperandValToReplace = nullptr; |
| 1133 |
|
1133 |
|
| 1134 |
/// If this user is to use the post-incremented value of an induction |
1134 |
/// If this user is to use the post-incremented value of an induction |
| 1135 |
/// variable, this set is non-empty and holds the loops associated with the |
1135 |
/// variable, this set is non-empty and holds the loops associated with the |
| 1136 |
/// induction variable. |
1136 |
/// induction variable. |
| 1137 |
PostIncLoopSet PostIncLoops; |
1137 |
PostIncLoopSet PostIncLoops; |
| 1138 |
|
1138 |
|
| 1139 |
/// A constant offset to be added to the LSRUse expression. This allows |
1139 |
/// A constant offset to be added to the LSRUse expression. This allows |
| 1140 |
/// multiple fixups to share the same LSRUse with different offsets, for |
1140 |
/// multiple fixups to share the same LSRUse with different offsets, for |
| 1141 |
/// example in an unrolled loop. |
1141 |
/// example in an unrolled loop. |
| 1142 |
int64_t Offset = 0; |
1142 |
int64_t Offset = 0; |
| 1143 |
|
1143 |
|
| 1144 |
LSRFixup() = default; |
1144 |
LSRFixup() = default; |
| 1145 |
|
1145 |
|
| 1146 |
bool isUseFullyOutsideLoop(const Loop *L) const; |
1146 |
bool isUseFullyOutsideLoop(const Loop *L) const; |
| 1147 |
|
1147 |
|
| 1148 |
void print(raw_ostream &OS) const; |
1148 |
void print(raw_ostream &OS) const; |
| 1149 |
void dump() const; |
1149 |
void dump() const; |
| 1150 |
}; |
1150 |
}; |
| 1151 |
|
1151 |
|
| 1152 |
/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted |
1152 |
/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted |
| 1153 |
/// SmallVectors of const SCEV*. |
1153 |
/// SmallVectors of const SCEV*. |
| 1154 |
struct UniquifierDenseMapInfo { |
1154 |
struct UniquifierDenseMapInfo { |
| 1155 |
static SmallVector getEmptyKey() { |
1155 |
static SmallVector getEmptyKey() { |
| 1156 |
SmallVector V; |
1156 |
SmallVector V; |
| 1157 |
V.push_back(reinterpret_cast(-1)); |
1157 |
V.push_back(reinterpret_cast(-1)); |
| 1158 |
return V; |
1158 |
return V; |
| 1159 |
} |
1159 |
} |
| 1160 |
|
1160 |
|
| 1161 |
static SmallVector getTombstoneKey() { |
1161 |
static SmallVector getTombstoneKey() { |
| 1162 |
SmallVector V; |
1162 |
SmallVector V; |
| 1163 |
V.push_back(reinterpret_cast(-2)); |
1163 |
V.push_back(reinterpret_cast(-2)); |
| 1164 |
return V; |
1164 |
return V; |
| 1165 |
} |
1165 |
} |
| 1166 |
|
1166 |
|
| 1167 |
static unsigned getHashValue(const SmallVector &V) { |
1167 |
static unsigned getHashValue(const SmallVector &V) { |
| 1168 |
return static_cast(hash_combine_range(V.begin(), V.end())); |
1168 |
return static_cast(hash_combine_range(V.begin(), V.end())); |
| 1169 |
} |
1169 |
} |
| 1170 |
|
1170 |
|
| 1171 |
static bool isEqual(const SmallVector &LHS, |
1171 |
static bool isEqual(const SmallVector &LHS, |
| 1172 |
const SmallVector &RHS) { |
1172 |
const SmallVector &RHS) { |
| 1173 |
return LHS == RHS; |
1173 |
return LHS == RHS; |
| 1174 |
} |
1174 |
} |
| 1175 |
}; |
1175 |
}; |
| 1176 |
|
1176 |
|
| 1177 |
/// This class holds the state that LSR keeps for each use in IVUsers, as well |
1177 |
/// This class holds the state that LSR keeps for each use in IVUsers, as well |
| 1178 |
/// as uses invented by LSR itself. It includes information about what kinds of |
1178 |
/// as uses invented by LSR itself. It includes information about what kinds of |
| 1179 |
/// things can be folded into the user, information about the user itself, and |
1179 |
/// things can be folded into the user, information about the user itself, and |
| 1180 |
/// information about how the use may be satisfied. TODO: Represent multiple |
1180 |
/// information about how the use may be satisfied. TODO: Represent multiple |
| 1181 |
/// users of the same expression in common? |
1181 |
/// users of the same expression in common? |
| 1182 |
class LSRUse { |
1182 |
class LSRUse { |
| 1183 |
DenseSet, UniquifierDenseMapInfo> Uniquifier; |
1183 |
DenseSet, UniquifierDenseMapInfo> Uniquifier; |
| 1184 |
|
1184 |
|
| 1185 |
public: |
1185 |
public: |
| 1186 |
/// An enum for a kind of use, indicating what types of scaled and immediate |
1186 |
/// An enum for a kind of use, indicating what types of scaled and immediate |
| 1187 |
/// operands it might support. |
1187 |
/// operands it might support. |
| 1188 |
enum KindType { |
1188 |
enum KindType { |
| 1189 |
Basic, ///< A normal use, with no folding. |
1189 |
Basic, ///< A normal use, with no folding. |
| 1190 |
Special, ///< A special case of basic, allowing -1 scales. |
1190 |
Special, ///< A special case of basic, allowing -1 scales. |
| 1191 |
Address, ///< An address use; folding according to TargetLowering |
1191 |
Address, ///< An address use; folding according to TargetLowering |
| 1192 |
ICmpZero ///< An equality icmp with both operands folded into one. |
1192 |
ICmpZero ///< An equality icmp with both operands folded into one. |
| 1193 |
// TODO: Add a generic icmp too? |
1193 |
// TODO: Add a generic icmp too? |
| 1194 |
}; |
1194 |
}; |
| 1195 |
|
1195 |
|
| 1196 |
using SCEVUseKindPair = PointerIntPair; |
1196 |
using SCEVUseKindPair = PointerIntPair; |
| 1197 |
|
1197 |
|
| 1198 |
KindType Kind; |
1198 |
KindType Kind; |
| 1199 |
MemAccessTy AccessTy; |
1199 |
MemAccessTy AccessTy; |
| 1200 |
|
1200 |
|
| 1201 |
/// The list of operands which are to be replaced. |
1201 |
/// The list of operands which are to be replaced. |
| 1202 |
SmallVector Fixups; |
1202 |
SmallVector Fixups; |
| 1203 |
|
1203 |
|
| 1204 |
/// Keep track of the min and max offsets of the fixups. |
1204 |
/// Keep track of the min and max offsets of the fixups. |
| 1205 |
int64_t MinOffset = std::numeric_limits::max(); |
1205 |
int64_t MinOffset = std::numeric_limits::max(); |
| 1206 |
int64_t MaxOffset = std::numeric_limits::min(); |
1206 |
int64_t MaxOffset = std::numeric_limits::min(); |
| 1207 |
|
1207 |
|
| 1208 |
/// This records whether all of the fixups using this LSRUse are outside of |
1208 |
/// This records whether all of the fixups using this LSRUse are outside of |
| 1209 |
/// the loop, in which case some special-case heuristics may be used. |
1209 |
/// the loop, in which case some special-case heuristics may be used. |
| 1210 |
bool AllFixupsOutsideLoop = true; |
1210 |
bool AllFixupsOutsideLoop = true; |
| 1211 |
|
1211 |
|
| 1212 |
/// RigidFormula is set to true to guarantee that this use will be associated |
1212 |
/// RigidFormula is set to true to guarantee that this use will be associated |
| 1213 |
/// with a single formula--the one that initially matched. Some SCEV |
1213 |
/// with a single formula--the one that initially matched. Some SCEV |
| 1214 |
/// expressions cannot be expanded. This allows LSR to consider the registers |
1214 |
/// expressions cannot be expanded. This allows LSR to consider the registers |
| 1215 |
/// used by those expressions without the need to expand them later after |
1215 |
/// used by those expressions without the need to expand them later after |
| 1216 |
/// changing the formula. |
1216 |
/// changing the formula. |
| 1217 |
bool RigidFormula = false; |
1217 |
bool RigidFormula = false; |
| 1218 |
|
1218 |
|
| 1219 |
/// This records the widest use type for any fixup using this |
1219 |
/// This records the widest use type for any fixup using this |
| 1220 |
/// LSRUse. FindUseWithSimilarFormula can't consider uses with different max |
1220 |
/// LSRUse. FindUseWithSimilarFormula can't consider uses with different max |
| 1221 |
/// fixup widths to be equivalent, because the narrower one may be relying on |
1221 |
/// fixup widths to be equivalent, because the narrower one may be relying on |
| 1222 |
/// the implicit truncation to truncate away bogus bits. |
1222 |
/// the implicit truncation to truncate away bogus bits. |
| 1223 |
Type *WidestFixupType = nullptr; |
1223 |
Type *WidestFixupType = nullptr; |
| 1224 |
|
1224 |
|
| 1225 |
/// A list of ways to build a value that can satisfy this user. After the |
1225 |
/// A list of ways to build a value that can satisfy this user. After the |
| 1226 |
/// list is populated, one of these is selected heuristically and used to |
1226 |
/// list is populated, one of these is selected heuristically and used to |
| 1227 |
/// formulate a replacement for OperandValToReplace in UserInst. |
1227 |
/// formulate a replacement for OperandValToReplace in UserInst. |
| 1228 |
SmallVector Formulae; |
1228 |
SmallVector Formulae; |
| 1229 |
|
1229 |
|
| 1230 |
/// The set of register candidates used by all formulae in this LSRUse. |
1230 |
/// The set of register candidates used by all formulae in this LSRUse. |
| 1231 |
SmallPtrSet Regs; |
1231 |
SmallPtrSet Regs; |
| 1232 |
|
1232 |
|
| 1233 |
LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {} |
1233 |
LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {} |
| 1234 |
|
1234 |
|
| 1235 |
LSRFixup &getNewFixup() { |
1235 |
LSRFixup &getNewFixup() { |
| 1236 |
Fixups.push_back(LSRFixup()); |
1236 |
Fixups.push_back(LSRFixup()); |
| 1237 |
return Fixups.back(); |
1237 |
return Fixups.back(); |
| 1238 |
} |
1238 |
} |
| 1239 |
|
1239 |
|
| 1240 |
void pushFixup(LSRFixup &f) { |
1240 |
void pushFixup(LSRFixup &f) { |
| 1241 |
Fixups.push_back(f); |
1241 |
Fixups.push_back(f); |
| 1242 |
if (f.Offset > MaxOffset) |
1242 |
if (f.Offset > MaxOffset) |
| 1243 |
MaxOffset = f.Offset; |
1243 |
MaxOffset = f.Offset; |
| 1244 |
if (f.Offset < MinOffset) |
1244 |
if (f.Offset < MinOffset) |
| 1245 |
MinOffset = f.Offset; |
1245 |
MinOffset = f.Offset; |
| 1246 |
} |
1246 |
} |
| 1247 |
|
1247 |
|
| 1248 |
bool HasFormulaWithSameRegs(const Formula &F) const; |
1248 |
bool HasFormulaWithSameRegs(const Formula &F) const; |
| 1249 |
float getNotSelectedProbability(const SCEV *Reg) const; |
1249 |
float getNotSelectedProbability(const SCEV *Reg) const; |
| 1250 |
bool InsertFormula(const Formula &F, const Loop &L); |
1250 |
bool InsertFormula(const Formula &F, const Loop &L); |
| 1251 |
void DeleteFormula(Formula &F); |
1251 |
void DeleteFormula(Formula &F); |
| 1252 |
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); |
1252 |
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); |
| 1253 |
|
1253 |
|
| 1254 |
void print(raw_ostream &OS) const; |
1254 |
void print(raw_ostream &OS) const; |
| 1255 |
void dump() const; |
1255 |
void dump() const; |
| 1256 |
}; |
1256 |
}; |
| 1257 |
|
1257 |
|
| 1258 |
} // end anonymous namespace |
1258 |
} // end anonymous namespace |
| 1259 |
|
1259 |
|
| 1260 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
1260 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
| 1261 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
1261 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
| 1262 |
GlobalValue *BaseGV, int64_t BaseOffset, |
1262 |
GlobalValue *BaseGV, int64_t BaseOffset, |
| 1263 |
bool HasBaseReg, int64_t Scale, |
1263 |
bool HasBaseReg, int64_t Scale, |
| 1264 |
Instruction *Fixup = nullptr); |
1264 |
Instruction *Fixup = nullptr); |
| 1265 |
|
1265 |
|
| 1266 |
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { |
1266 |
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { |
| 1267 |
if (isa(Reg) || isa(Reg)) |
1267 |
if (isa(Reg) || isa(Reg)) |
| 1268 |
return 1; |
1268 |
return 1; |
| 1269 |
if (Depth == 0) |
1269 |
if (Depth == 0) |
| 1270 |
return 0; |
1270 |
return 0; |
| 1271 |
if (const auto *S = dyn_cast(Reg)) |
1271 |
if (const auto *S = dyn_cast(Reg)) |
| 1272 |
return getSetupCost(S->getStart(), Depth - 1); |
1272 |
return getSetupCost(S->getStart(), Depth - 1); |
| 1273 |
if (auto S = dyn_cast(Reg)) |
1273 |
if (auto S = dyn_cast(Reg)) |
| 1274 |
return getSetupCost(S->getOperand(), Depth - 1); |
1274 |
return getSetupCost(S->getOperand(), Depth - 1); |
| 1275 |
if (auto S = dyn_cast(Reg)) |
1275 |
if (auto S = dyn_cast(Reg)) |
| 1276 |
return std::accumulate(S->operands().begin(), S->operands().end(), 0, |
1276 |
return std::accumulate(S->operands().begin(), S->operands().end(), 0, |
| 1277 |
[&](unsigned i, const SCEV *Reg) { |
1277 |
[&](unsigned i, const SCEV *Reg) { |
| 1278 |
return i + getSetupCost(Reg, Depth - 1); |
1278 |
return i + getSetupCost(Reg, Depth - 1); |
| 1279 |
}); |
1279 |
}); |
| 1280 |
if (auto S = dyn_cast(Reg)) |
1280 |
if (auto S = dyn_cast(Reg)) |
| 1281 |
return getSetupCost(S->getLHS(), Depth - 1) + |
1281 |
return getSetupCost(S->getLHS(), Depth - 1) + |
| 1282 |
getSetupCost(S->getRHS(), Depth - 1); |
1282 |
getSetupCost(S->getRHS(), Depth - 1); |
| 1283 |
return 0; |
1283 |
return 0; |
| 1284 |
} |
1284 |
} |
| 1285 |
|
1285 |
|
| 1286 |
/// Tally up interesting quantities from the given register. |
1286 |
/// Tally up interesting quantities from the given register. |
| 1287 |
void Cost::RateRegister(const Formula &F, const SCEV *Reg, |
1287 |
void Cost::RateRegister(const Formula &F, const SCEV *Reg, |
| 1288 |
SmallPtrSetImpl &Regs) { |
1288 |
SmallPtrSetImpl &Regs) { |
| 1289 |
if (const SCEVAddRecExpr *AR = dyn_cast(Reg)) { |
1289 |
if (const SCEVAddRecExpr *AR = dyn_cast(Reg)) { |
| 1290 |
// If this is an addrec for another loop, it should be an invariant |
1290 |
// If this is an addrec for another loop, it should be an invariant |
| 1291 |
// with respect to L since L is the innermost loop (at least |
1291 |
// with respect to L since L is the innermost loop (at least |
| 1292 |
// for now LSR only handles innermost loops). |
1292 |
// for now LSR only handles innermost loops). |
| 1293 |
if (AR->getLoop() != L) { |
1293 |
if (AR->getLoop() != L) { |
| 1294 |
// If the AddRec exists, consider it's register free and leave it alone. |
1294 |
// If the AddRec exists, consider it's register free and leave it alone. |
| 1295 |
if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed) |
1295 |
if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed) |
| 1296 |
return; |
1296 |
return; |
| 1297 |
|
1297 |
|
| 1298 |
// It is bad to allow LSR for current loop to add induction variables |
1298 |
// It is bad to allow LSR for current loop to add induction variables |
| 1299 |
// for its sibling loops. |
1299 |
// for its sibling loops. |
| 1300 |
if (!AR->getLoop()->contains(L)) { |
1300 |
if (!AR->getLoop()->contains(L)) { |
| 1301 |
Lose(); |
1301 |
Lose(); |
| 1302 |
return; |
1302 |
return; |
| 1303 |
} |
1303 |
} |
| 1304 |
|
1304 |
|
| 1305 |
// Otherwise, it will be an invariant with respect to Loop L. |
1305 |
// Otherwise, it will be an invariant with respect to Loop L. |
| 1306 |
++C.NumRegs; |
1306 |
++C.NumRegs; |
| 1307 |
return; |
1307 |
return; |
| 1308 |
} |
1308 |
} |
| 1309 |
|
1309 |
|
| 1310 |
unsigned LoopCost = 1; |
1310 |
unsigned LoopCost = 1; |
| 1311 |
if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) || |
1311 |
if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) || |
| 1312 |
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) { |
1312 |
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) { |
| 1313 |
|
1313 |
|
| 1314 |
// If the step size matches the base offset, we could use pre-indexed |
1314 |
// If the step size matches the base offset, we could use pre-indexed |
| 1315 |
// addressing. |
1315 |
// addressing. |
| 1316 |
if (AMK == TTI::AMK_PreIndexed) { |
1316 |
if (AMK == TTI::AMK_PreIndexed) { |
| 1317 |
if (auto *Step = dyn_cast(AR->getStepRecurrence(*SE))) |
1317 |
if (auto *Step = dyn_cast(AR->getStepRecurrence(*SE))) |
| 1318 |
if (Step->getAPInt() == F.BaseOffset) |
1318 |
if (Step->getAPInt() == F.BaseOffset) |
| 1319 |
LoopCost = 0; |
1319 |
LoopCost = 0; |
| 1320 |
} else if (AMK == TTI::AMK_PostIndexed) { |
1320 |
} else if (AMK == TTI::AMK_PostIndexed) { |
| 1321 |
const SCEV *LoopStep = AR->getStepRecurrence(*SE); |
1321 |
const SCEV *LoopStep = AR->getStepRecurrence(*SE); |
| 1322 |
if (isa(LoopStep)) { |
1322 |
if (isa(LoopStep)) { |
| 1323 |
const SCEV *LoopStart = AR->getStart(); |
1323 |
const SCEV *LoopStart = AR->getStart(); |
| 1324 |
if (!isa(LoopStart) && |
1324 |
if (!isa(LoopStart) && |
| 1325 |
SE->isLoopInvariant(LoopStart, L)) |
1325 |
SE->isLoopInvariant(LoopStart, L)) |
| 1326 |
LoopCost = 0; |
1326 |
LoopCost = 0; |
| 1327 |
} |
1327 |
} |
| 1328 |
} |
1328 |
} |
| 1329 |
} |
1329 |
} |
| 1330 |
C.AddRecCost += LoopCost; |
1330 |
C.AddRecCost += LoopCost; |
| 1331 |
|
1331 |
|
| 1332 |
// Add the step value register, if it needs one. |
1332 |
// Add the step value register, if it needs one. |
| 1333 |
// TODO: The non-affine case isn't precisely modeled here. |
1333 |
// TODO: The non-affine case isn't precisely modeled here. |
| 1334 |
if (!AR->isAffine() || !isa(AR->getOperand(1))) { |
1334 |
if (!AR->isAffine() || !isa(AR->getOperand(1))) { |
| 1335 |
if (!Regs.count(AR->getOperand(1))) { |
1335 |
if (!Regs.count(AR->getOperand(1))) { |
| 1336 |
RateRegister(F, AR->getOperand(1), Regs); |
1336 |
RateRegister(F, AR->getOperand(1), Regs); |
| 1337 |
if (isLoser()) |
1337 |
if (isLoser()) |
| 1338 |
return; |
1338 |
return; |
| 1339 |
} |
1339 |
} |
| 1340 |
} |
1340 |
} |
| 1341 |
} |
1341 |
} |
| 1342 |
++C.NumRegs; |
1342 |
++C.NumRegs; |
| 1343 |
|
1343 |
|
| 1344 |
// Rough heuristic; favor registers which don't require extra setup |
1344 |
// Rough heuristic; favor registers which don't require extra setup |
| 1345 |
// instructions in the preheader. |
1345 |
// instructions in the preheader. |
| 1346 |
C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit); |
1346 |
C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit); |
| 1347 |
// Ensure we don't, even with the recusion limit, produce invalid costs. |
1347 |
// Ensure we don't, even with the recusion limit, produce invalid costs. |
| 1348 |
C.SetupCost = std::min(C.SetupCost, 1 << 16); |
1348 |
C.SetupCost = std::min(C.SetupCost, 1 << 16); |
| 1349 |
|
1349 |
|
| 1350 |
C.NumIVMuls += isa(Reg) && |
1350 |
C.NumIVMuls += isa(Reg) && |
| 1351 |
SE->hasComputableLoopEvolution(Reg, L); |
1351 |
SE->hasComputableLoopEvolution(Reg, L); |
| 1352 |
} |
1352 |
} |
| 1353 |
|
1353 |
|
| 1354 |
/// Record this register in the set. If we haven't seen it before, rate |
1354 |
/// Record this register in the set. If we haven't seen it before, rate |
| 1355 |
/// it. Optional LoserRegs provides a way to declare any formula that refers to |
1355 |
/// it. Optional LoserRegs provides a way to declare any formula that refers to |
| 1356 |
/// one of those regs an instant loser. |
1356 |
/// one of those regs an instant loser. |
| 1357 |
void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg, |
1357 |
void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg, |
| 1358 |
SmallPtrSetImpl &Regs, |
1358 |
SmallPtrSetImpl &Regs, |
| 1359 |
SmallPtrSetImpl *LoserRegs) { |
1359 |
SmallPtrSetImpl *LoserRegs) { |
| 1360 |
if (LoserRegs && LoserRegs->count(Reg)) { |
1360 |
if (LoserRegs && LoserRegs->count(Reg)) { |
| 1361 |
Lose(); |
1361 |
Lose(); |
| 1362 |
return; |
1362 |
return; |
| 1363 |
} |
1363 |
} |
| 1364 |
if (Regs.insert(Reg).second) { |
1364 |
if (Regs.insert(Reg).second) { |
| 1365 |
RateRegister(F, Reg, Regs); |
1365 |
RateRegister(F, Reg, Regs); |
| 1366 |
if (LoserRegs && isLoser()) |
1366 |
if (LoserRegs && isLoser()) |
| 1367 |
LoserRegs->insert(Reg); |
1367 |
LoserRegs->insert(Reg); |
| 1368 |
} |
1368 |
} |
| 1369 |
} |
1369 |
} |
| 1370 |
|
1370 |
|
| 1371 |
void Cost::RateFormula(const Formula &F, |
1371 |
void Cost::RateFormula(const Formula &F, |
| 1372 |
SmallPtrSetImpl &Regs, |
1372 |
SmallPtrSetImpl &Regs, |
| 1373 |
const DenseSet &VisitedRegs, |
1373 |
const DenseSet &VisitedRegs, |
| 1374 |
const LSRUse &LU, |
1374 |
const LSRUse &LU, |
| 1375 |
SmallPtrSetImpl *LoserRegs) { |
1375 |
SmallPtrSetImpl *LoserRegs) { |
| 1376 |
if (isLoser()) |
1376 |
if (isLoser()) |
| 1377 |
return; |
1377 |
return; |
| 1378 |
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); |
1378 |
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); |
| 1379 |
// Tally up the registers. |
1379 |
// Tally up the registers. |
| 1380 |
unsigned PrevAddRecCost = C.AddRecCost; |
1380 |
unsigned PrevAddRecCost = C.AddRecCost; |
| 1381 |
unsigned PrevNumRegs = C.NumRegs; |
1381 |
unsigned PrevNumRegs = C.NumRegs; |
| 1382 |
unsigned PrevNumBaseAdds = C.NumBaseAdds; |
1382 |
unsigned PrevNumBaseAdds = C.NumBaseAdds; |
| 1383 |
if (const SCEV *ScaledReg = F.ScaledReg) { |
1383 |
if (const SCEV *ScaledReg = F.ScaledReg) { |
| 1384 |
if (VisitedRegs.count(ScaledReg)) { |
1384 |
if (VisitedRegs.count(ScaledReg)) { |
| 1385 |
Lose(); |
1385 |
Lose(); |
| 1386 |
return; |
1386 |
return; |
| 1387 |
} |
1387 |
} |
| 1388 |
RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs); |
1388 |
RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs); |
| 1389 |
if (isLoser()) |
1389 |
if (isLoser()) |
| 1390 |
return; |
1390 |
return; |
| 1391 |
} |
1391 |
} |
| 1392 |
for (const SCEV *BaseReg : F.BaseRegs) { |
1392 |
for (const SCEV *BaseReg : F.BaseRegs) { |
| 1393 |
if (VisitedRegs.count(BaseReg)) { |
1393 |
if (VisitedRegs.count(BaseReg)) { |
| 1394 |
Lose(); |
1394 |
Lose(); |
| 1395 |
return; |
1395 |
return; |
| 1396 |
} |
1396 |
} |
| 1397 |
RatePrimaryRegister(F, BaseReg, Regs, LoserRegs); |
1397 |
RatePrimaryRegister(F, BaseReg, Regs, LoserRegs); |
| 1398 |
if (isLoser()) |
1398 |
if (isLoser()) |
| 1399 |
return; |
1399 |
return; |
| 1400 |
} |
1400 |
} |
| 1401 |
|
1401 |
|
| 1402 |
// Determine how many (unfolded) adds we'll need inside the loop. |
1402 |
// Determine how many (unfolded) adds we'll need inside the loop. |
| 1403 |
size_t NumBaseParts = F.getNumRegs(); |
1403 |
size_t NumBaseParts = F.getNumRegs(); |
| 1404 |
if (NumBaseParts > 1) |
1404 |
if (NumBaseParts > 1) |
| 1405 |
// Do not count the base and a possible second register if the target |
1405 |
// Do not count the base and a possible second register if the target |
| 1406 |
// allows to fold 2 registers. |
1406 |
// allows to fold 2 registers. |
| 1407 |
C.NumBaseAdds += |
1407 |
C.NumBaseAdds += |
| 1408 |
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F))); |
1408 |
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F))); |
| 1409 |
C.NumBaseAdds += (F.UnfoldedOffset != 0); |
1409 |
C.NumBaseAdds += (F.UnfoldedOffset != 0); |
| 1410 |
|
1410 |
|
| 1411 |
// Accumulate non-free scaling amounts. |
1411 |
// Accumulate non-free scaling amounts. |
| 1412 |
C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue(); |
1412 |
C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue(); |
| 1413 |
|
1413 |
|
| 1414 |
// Tally up the non-zero immediates. |
1414 |
// Tally up the non-zero immediates. |
| 1415 |
for (const LSRFixup &Fixup : LU.Fixups) { |
1415 |
for (const LSRFixup &Fixup : LU.Fixups) { |
| 1416 |
int64_t O = Fixup.Offset; |
1416 |
int64_t O = Fixup.Offset; |
| 1417 |
int64_t Offset = (uint64_t)O + F.BaseOffset; |
1417 |
int64_t Offset = (uint64_t)O + F.BaseOffset; |
| 1418 |
if (F.BaseGV) |
1418 |
if (F.BaseGV) |
| 1419 |
C.ImmCost += 64; // Handle symbolic values conservatively. |
1419 |
C.ImmCost += 64; // Handle symbolic values conservatively. |
| 1420 |
// TODO: This should probably be the pointer size. |
1420 |
// TODO: This should probably be the pointer size. |
| 1421 |
else if (Offset != 0) |
1421 |
else if (Offset != 0) |
| 1422 |
C.ImmCost += APInt(64, Offset, true).getSignificantBits(); |
1422 |
C.ImmCost += APInt(64, Offset, true).getSignificantBits(); |
| 1423 |
|
1423 |
|
| 1424 |
// Check with target if this offset with this instruction is |
1424 |
// Check with target if this offset with this instruction is |
| 1425 |
// specifically not supported. |
1425 |
// specifically not supported. |
| 1426 |
if (LU.Kind == LSRUse::Address && Offset != 0 && |
1426 |
if (LU.Kind == LSRUse::Address && Offset != 0 && |
| 1427 |
!isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, |
1427 |
!isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, |
| 1428 |
Offset, F.HasBaseReg, F.Scale, Fixup.UserInst)) |
1428 |
Offset, F.HasBaseReg, F.Scale, Fixup.UserInst)) |
| 1429 |
C.NumBaseAdds++; |
1429 |
C.NumBaseAdds++; |
| 1430 |
} |
1430 |
} |
| 1431 |
|
1431 |
|
| 1432 |
// If we don't count instruction cost exit here. |
1432 |
// If we don't count instruction cost exit here. |
| 1433 |
if (!InsnsCost) { |
1433 |
if (!InsnsCost) { |
| 1434 |
assert(isValid() && "invalid cost"); |
1434 |
assert(isValid() && "invalid cost"); |
| 1435 |
return; |
1435 |
return; |
| 1436 |
} |
1436 |
} |
| 1437 |
|
1437 |
|
| 1438 |
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as |
1438 |
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as |
| 1439 |
// additional instruction (at least fill). |
1439 |
// additional instruction (at least fill). |
| 1440 |
// TODO: Need distinguish register class? |
1440 |
// TODO: Need distinguish register class? |
| 1441 |
unsigned TTIRegNum = TTI->getNumberOfRegisters( |
1441 |
unsigned TTIRegNum = TTI->getNumberOfRegisters( |
| 1442 |
TTI->getRegisterClassForType(false, F.getType())) - 1; |
1442 |
TTI->getRegisterClassForType(false, F.getType())) - 1; |
| 1443 |
if (C.NumRegs > TTIRegNum) { |
1443 |
if (C.NumRegs > TTIRegNum) { |
| 1444 |
// Cost already exceeded TTIRegNum, then only newly added register can add |
1444 |
// Cost already exceeded TTIRegNum, then only newly added register can add |
| 1445 |
// new instructions. |
1445 |
// new instructions. |
| 1446 |
if (PrevNumRegs > TTIRegNum) |
1446 |
if (PrevNumRegs > TTIRegNum) |
| 1447 |
C.Insns += (C.NumRegs - PrevNumRegs); |
1447 |
C.Insns += (C.NumRegs - PrevNumRegs); |
| 1448 |
else |
1448 |
else |
| 1449 |
C.Insns += (C.NumRegs - TTIRegNum); |
1449 |
C.Insns += (C.NumRegs - TTIRegNum); |
| 1450 |
} |
1450 |
} |
| 1451 |
|
1451 |
|
| 1452 |
// If ICmpZero formula ends with not 0, it could not be replaced by |
1452 |
// If ICmpZero formula ends with not 0, it could not be replaced by |
| 1453 |
// just add or sub. We'll need to compare final result of AddRec. |
1453 |
// just add or sub. We'll need to compare final result of AddRec. |
| 1454 |
// That means we'll need an additional instruction. But if the target can |
1454 |
// That means we'll need an additional instruction. But if the target can |
| 1455 |
// macro-fuse a compare with a branch, don't count this extra instruction. |
1455 |
// macro-fuse a compare with a branch, don't count this extra instruction. |
| 1456 |
// For -10 + {0, +, 1}: |
1456 |
// For -10 + {0, +, 1}: |
| 1457 |
// i = i + 1; |
1457 |
// i = i + 1; |
| 1458 |
// cmp i, 10 |
1458 |
// cmp i, 10 |
| 1459 |
// |
1459 |
// |
| 1460 |
// For {-10, +, 1}: |
1460 |
// For {-10, +, 1}: |
| 1461 |
// i = i + 1; |
1461 |
// i = i + 1; |
| 1462 |
if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && |
1462 |
if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && |
| 1463 |
!TTI->canMacroFuseCmp()) |
1463 |
!TTI->canMacroFuseCmp()) |
| 1464 |
C.Insns++; |
1464 |
C.Insns++; |
| 1465 |
// Each new AddRec adds 1 instruction to calculation. |
1465 |
// Each new AddRec adds 1 instruction to calculation. |
| 1466 |
C.Insns += (C.AddRecCost - PrevAddRecCost); |
1466 |
C.Insns += (C.AddRecCost - PrevAddRecCost); |
| 1467 |
|
1467 |
|
| 1468 |
// BaseAdds adds instructions for unfolded registers. |
1468 |
// BaseAdds adds instructions for unfolded registers. |
| 1469 |
if (LU.Kind != LSRUse::ICmpZero) |
1469 |
if (LU.Kind != LSRUse::ICmpZero) |
| 1470 |
C.Insns += C.NumBaseAdds - PrevNumBaseAdds; |
1470 |
C.Insns += C.NumBaseAdds - PrevNumBaseAdds; |
| 1471 |
assert(isValid() && "invalid cost"); |
1471 |
assert(isValid() && "invalid cost"); |
| 1472 |
} |
1472 |
} |
| 1473 |
|
1473 |
|
| 1474 |
/// Set this cost to a losing value. |
1474 |
/// Set this cost to a losing value. |
| 1475 |
void Cost::Lose() { |
1475 |
void Cost::Lose() { |
| 1476 |
C.Insns = std::numeric_limits::max(); |
1476 |
C.Insns = std::numeric_limits::max(); |
| 1477 |
C.NumRegs = std::numeric_limits::max(); |
1477 |
C.NumRegs = std::numeric_limits::max(); |
| 1478 |
C.AddRecCost = std::numeric_limits::max(); |
1478 |
C.AddRecCost = std::numeric_limits::max(); |
| 1479 |
C.NumIVMuls = std::numeric_limits::max(); |
1479 |
C.NumIVMuls = std::numeric_limits::max(); |
| 1480 |
C.NumBaseAdds = std::numeric_limits::max(); |
1480 |
C.NumBaseAdds = std::numeric_limits::max(); |
| 1481 |
C.ImmCost = std::numeric_limits::max(); |
1481 |
C.ImmCost = std::numeric_limits::max(); |
| 1482 |
C.SetupCost = std::numeric_limits::max(); |
1482 |
C.SetupCost = std::numeric_limits::max(); |
| 1483 |
C.ScaleCost = std::numeric_limits::max(); |
1483 |
C.ScaleCost = std::numeric_limits::max(); |
| 1484 |
} |
1484 |
} |
| 1485 |
|
1485 |
|
| 1486 |
/// Choose the lower cost. |
1486 |
/// Choose the lower cost. |
| 1487 |
bool Cost::isLess(const Cost &Other) const { |
1487 |
bool Cost::isLess(const Cost &Other) const { |
| 1488 |
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && |
1488 |
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && |
| 1489 |
C.Insns != Other.C.Insns) |
1489 |
C.Insns != Other.C.Insns) |
| 1490 |
return C.Insns < Other.C.Insns; |
1490 |
return C.Insns < Other.C.Insns; |
| 1491 |
return TTI->isLSRCostLess(C, Other.C); |
1491 |
return TTI->isLSRCostLess(C, Other.C); |
| 1492 |
} |
1492 |
} |
| 1493 |
|
1493 |
|
| 1494 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1494 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 1495 |
void Cost::print(raw_ostream &OS) const { |
1495 |
void Cost::print(raw_ostream &OS) const { |
| 1496 |
if (InsnsCost) |
1496 |
if (InsnsCost) |
| 1497 |
OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); |
1497 |
OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); |
| 1498 |
OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); |
1498 |
OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); |
| 1499 |
if (C.AddRecCost != 0) |
1499 |
if (C.AddRecCost != 0) |
| 1500 |
OS << ", with addrec cost " << C.AddRecCost; |
1500 |
OS << ", with addrec cost " << C.AddRecCost; |
| 1501 |
if (C.NumIVMuls != 0) |
1501 |
if (C.NumIVMuls != 0) |
| 1502 |
OS << ", plus " << C.NumIVMuls << " IV mul" |
1502 |
OS << ", plus " << C.NumIVMuls << " IV mul" |
| 1503 |
<< (C.NumIVMuls == 1 ? "" : "s"); |
1503 |
<< (C.NumIVMuls == 1 ? "" : "s"); |
| 1504 |
if (C.NumBaseAdds != 0) |
1504 |
if (C.NumBaseAdds != 0) |
| 1505 |
OS << ", plus " << C.NumBaseAdds << " base add" |
1505 |
OS << ", plus " << C.NumBaseAdds << " base add" |
| 1506 |
<< (C.NumBaseAdds == 1 ? "" : "s"); |
1506 |
<< (C.NumBaseAdds == 1 ? "" : "s"); |
| 1507 |
if (C.ScaleCost != 0) |
1507 |
if (C.ScaleCost != 0) |
| 1508 |
OS << ", plus " << C.ScaleCost << " scale cost"; |
1508 |
OS << ", plus " << C.ScaleCost << " scale cost"; |
| 1509 |
if (C.ImmCost != 0) |
1509 |
if (C.ImmCost != 0) |
| 1510 |
OS << ", plus " << C.ImmCost << " imm cost"; |
1510 |
OS << ", plus " << C.ImmCost << " imm cost"; |
| 1511 |
if (C.SetupCost != 0) |
1511 |
if (C.SetupCost != 0) |
| 1512 |
OS << ", plus " << C.SetupCost << " setup cost"; |
1512 |
OS << ", plus " << C.SetupCost << " setup cost"; |
| 1513 |
} |
1513 |
} |
| 1514 |
|
1514 |
|
| 1515 |
LLVM_DUMP_METHOD void Cost::dump() const { |
1515 |
LLVM_DUMP_METHOD void Cost::dump() const { |
| 1516 |
print(errs()); errs() << '\n'; |
1516 |
print(errs()); errs() << '\n'; |
| 1517 |
} |
1517 |
} |
| 1518 |
#endif |
1518 |
#endif |
| 1519 |
|
1519 |
|
| 1520 |
/// Test whether this fixup always uses its value outside of the given loop. |
1520 |
/// Test whether this fixup always uses its value outside of the given loop. |
| 1521 |
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { |
1521 |
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { |
| 1522 |
// PHI nodes use their value in their incoming blocks. |
1522 |
// PHI nodes use their value in their incoming blocks. |
| 1523 |
if (const PHINode *PN = dyn_cast(UserInst)) { |
1523 |
if (const PHINode *PN = dyn_cast(UserInst)) { |
| 1524 |
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
1524 |
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
| 1525 |
if (PN->getIncomingValue(i) == OperandValToReplace && |
1525 |
if (PN->getIncomingValue(i) == OperandValToReplace && |
| 1526 |
L->contains(PN->getIncomingBlock(i))) |
1526 |
L->contains(PN->getIncomingBlock(i))) |
| 1527 |
return false; |
1527 |
return false; |
| 1528 |
return true; |
1528 |
return true; |
| 1529 |
} |
1529 |
} |
| 1530 |
|
1530 |
|
| 1531 |
return !L->contains(UserInst); |
1531 |
return !L->contains(UserInst); |
| 1532 |
} |
1532 |
} |
| 1533 |
|
1533 |
|
| 1534 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1534 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 1535 |
void LSRFixup::print(raw_ostream &OS) const { |
1535 |
void LSRFixup::print(raw_ostream &OS) const { |
| 1536 |
OS << "UserInst="; |
1536 |
OS << "UserInst="; |
| 1537 |
// Store is common and interesting enough to be worth special-casing. |
1537 |
// Store is common and interesting enough to be worth special-casing. |
| 1538 |
if (StoreInst *Store = dyn_cast(UserInst)) { |
1538 |
if (StoreInst *Store = dyn_cast(UserInst)) { |
| 1539 |
OS << "store "; |
1539 |
OS << "store "; |
| 1540 |
Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); |
1540 |
Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); |
| 1541 |
} else if (UserInst->getType()->isVoidTy()) |
1541 |
} else if (UserInst->getType()->isVoidTy()) |
| 1542 |
OS << UserInst->getOpcodeName(); |
1542 |
OS << UserInst->getOpcodeName(); |
| 1543 |
else |
1543 |
else |
| 1544 |
UserInst->printAsOperand(OS, /*PrintType=*/false); |
1544 |
UserInst->printAsOperand(OS, /*PrintType=*/false); |
| 1545 |
|
1545 |
|
| 1546 |
OS << ", OperandValToReplace="; |
1546 |
OS << ", OperandValToReplace="; |
| 1547 |
OperandValToReplace->printAsOperand(OS, /*PrintType=*/false); |
1547 |
OperandValToReplace->printAsOperand(OS, /*PrintType=*/false); |
| 1548 |
|
1548 |
|
| 1549 |
for (const Loop *PIL : PostIncLoops) { |
1549 |
for (const Loop *PIL : PostIncLoops) { |
| 1550 |
OS << ", PostIncLoop="; |
1550 |
OS << ", PostIncLoop="; |
| 1551 |
PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
1551 |
PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false); |
| 1552 |
} |
1552 |
} |
| 1553 |
|
1553 |
|
| 1554 |
if (Offset != 0) |
1554 |
if (Offset != 0) |
| 1555 |
OS << ", Offset=" << Offset; |
1555 |
OS << ", Offset=" << Offset; |
| 1556 |
} |
1556 |
} |
| 1557 |
|
1557 |
|
| 1558 |
LLVM_DUMP_METHOD void LSRFixup::dump() const { |
1558 |
LLVM_DUMP_METHOD void LSRFixup::dump() const { |
| 1559 |
print(errs()); errs() << '\n'; |
1559 |
print(errs()); errs() << '\n'; |
| 1560 |
} |
1560 |
} |
| 1561 |
#endif |
1561 |
#endif |
| 1562 |
|
1562 |
|
| 1563 |
/// Test whether this use as a formula which has the same registers as the given |
1563 |
/// Test whether this use as a formula which has the same registers as the given |
| 1564 |
/// formula. |
1564 |
/// formula. |
| 1565 |
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { |
1565 |
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { |
| 1566 |
SmallVector Key = F.BaseRegs; |
1566 |
SmallVector Key = F.BaseRegs; |
| 1567 |
if (F.ScaledReg) Key.push_back(F.ScaledReg); |
1567 |
if (F.ScaledReg) Key.push_back(F.ScaledReg); |
| 1568 |
// Unstable sort by host order ok, because this is only used for uniquifying. |
1568 |
// Unstable sort by host order ok, because this is only used for uniquifying. |
| 1569 |
llvm::sort(Key); |
1569 |
llvm::sort(Key); |
| 1570 |
return Uniquifier.count(Key); |
1570 |
return Uniquifier.count(Key); |
| 1571 |
} |
1571 |
} |
| 1572 |
|
1572 |
|
| 1573 |
/// The function returns a probability of selecting formula without Reg. |
1573 |
/// The function returns a probability of selecting formula without Reg. |
| 1574 |
float LSRUse::getNotSelectedProbability(const SCEV *Reg) const { |
1574 |
float LSRUse::getNotSelectedProbability(const SCEV *Reg) const { |
| 1575 |
unsigned FNum = 0; |
1575 |
unsigned FNum = 0; |
| 1576 |
for (const Formula &F : Formulae) |
1576 |
for (const Formula &F : Formulae) |
| 1577 |
if (F.referencesReg(Reg)) |
1577 |
if (F.referencesReg(Reg)) |
| 1578 |
FNum++; |
1578 |
FNum++; |
| 1579 |
return ((float)(Formulae.size() - FNum)) / Formulae.size(); |
1579 |
return ((float)(Formulae.size() - FNum)) / Formulae.size(); |
| 1580 |
} |
1580 |
} |
| 1581 |
|
1581 |
|
| 1582 |
/// If the given formula has not yet been inserted, add it to the list, and |
1582 |
/// If the given formula has not yet been inserted, add it to the list, and |
| 1583 |
/// return true. Return false otherwise. The formula must be in canonical form. |
1583 |
/// return true. Return false otherwise. The formula must be in canonical form. |
| 1584 |
bool LSRUse::InsertFormula(const Formula &F, const Loop &L) { |
1584 |
bool LSRUse::InsertFormula(const Formula &F, const Loop &L) { |
| 1585 |
assert(F.isCanonical(L) && "Invalid canonical representation"); |
1585 |
assert(F.isCanonical(L) && "Invalid canonical representation"); |
| 1586 |
|
1586 |
|
| 1587 |
if (!Formulae.empty() && RigidFormula) |
1587 |
if (!Formulae.empty() && RigidFormula) |
| 1588 |
return false; |
1588 |
return false; |
| 1589 |
|
1589 |
|
| 1590 |
SmallVector Key = F.BaseRegs; |
1590 |
SmallVector Key = F.BaseRegs; |
| 1591 |
if (F.ScaledReg) Key.push_back(F.ScaledReg); |
1591 |
if (F.ScaledReg) Key.push_back(F.ScaledReg); |
| 1592 |
// Unstable sort by host order ok, because this is only used for uniquifying. |
1592 |
// Unstable sort by host order ok, because this is only used for uniquifying. |
| 1593 |
llvm::sort(Key); |
1593 |
llvm::sort(Key); |
| 1594 |
|
1594 |
|
| 1595 |
if (!Uniquifier.insert(Key).second) |
1595 |
if (!Uniquifier.insert(Key).second) |
| 1596 |
return false; |
1596 |
return false; |
| 1597 |
|
1597 |
|
| 1598 |
// Using a register to hold the value of 0 is not profitable. |
1598 |
// Using a register to hold the value of 0 is not profitable. |
| 1599 |
assert((!F.ScaledReg || !F.ScaledReg->isZero()) && |
1599 |
assert((!F.ScaledReg || !F.ScaledReg->isZero()) && |
| 1600 |
"Zero allocated in a scaled register!"); |
1600 |
"Zero allocated in a scaled register!"); |
| 1601 |
#ifndef NDEBUG |
1601 |
#ifndef NDEBUG |
| 1602 |
for (const SCEV *BaseReg : F.BaseRegs) |
1602 |
for (const SCEV *BaseReg : F.BaseRegs) |
| 1603 |
assert(!BaseReg->isZero() && "Zero allocated in a base register!"); |
1603 |
assert(!BaseReg->isZero() && "Zero allocated in a base register!"); |
| 1604 |
#endif |
1604 |
#endif |
| 1605 |
|
1605 |
|
| 1606 |
// Add the formula to the list. |
1606 |
// Add the formula to the list. |
| 1607 |
Formulae.push_back(F); |
1607 |
Formulae.push_back(F); |
| 1608 |
|
1608 |
|
| 1609 |
// Record registers now being used by this use. |
1609 |
// Record registers now being used by this use. |
| 1610 |
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); |
1610 |
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); |
| 1611 |
if (F.ScaledReg) |
1611 |
if (F.ScaledReg) |
| 1612 |
Regs.insert(F.ScaledReg); |
1612 |
Regs.insert(F.ScaledReg); |
| 1613 |
|
1613 |
|
| 1614 |
return true; |
1614 |
return true; |
| 1615 |
} |
1615 |
} |
| 1616 |
|
1616 |
|
| 1617 |
/// Remove the given formula from this use's list. |
1617 |
/// Remove the given formula from this use's list. |
| 1618 |
void LSRUse::DeleteFormula(Formula &F) { |
1618 |
void LSRUse::DeleteFormula(Formula &F) { |
| 1619 |
if (&F != &Formulae.back()) |
1619 |
if (&F != &Formulae.back()) |
| 1620 |
std::swap(F, Formulae.back()); |
1620 |
std::swap(F, Formulae.back()); |
| 1621 |
Formulae.pop_back(); |
1621 |
Formulae.pop_back(); |
| 1622 |
} |
1622 |
} |
| 1623 |
|
1623 |
|
| 1624 |
/// Recompute the Regs field, and update RegUses. |
1624 |
/// Recompute the Regs field, and update RegUses. |
| 1625 |
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { |
1625 |
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { |
| 1626 |
// Now that we've filtered out some formulae, recompute the Regs set. |
1626 |
// Now that we've filtered out some formulae, recompute the Regs set. |
| 1627 |
SmallPtrSet OldRegs = std::move(Regs); |
1627 |
SmallPtrSet OldRegs = std::move(Regs); |
| 1628 |
Regs.clear(); |
1628 |
Regs.clear(); |
| 1629 |
for (const Formula &F : Formulae) { |
1629 |
for (const Formula &F : Formulae) { |
| 1630 |
if (F.ScaledReg) Regs.insert(F.ScaledReg); |
1630 |
if (F.ScaledReg) Regs.insert(F.ScaledReg); |
| 1631 |
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); |
1631 |
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); |
| 1632 |
} |
1632 |
} |
| 1633 |
|
1633 |
|
| 1634 |
// Update the RegTracker. |
1634 |
// Update the RegTracker. |
| 1635 |
for (const SCEV *S : OldRegs) |
1635 |
for (const SCEV *S : OldRegs) |
| 1636 |
if (!Regs.count(S)) |
1636 |
if (!Regs.count(S)) |
| 1637 |
RegUses.dropRegister(S, LUIdx); |
1637 |
RegUses.dropRegister(S, LUIdx); |
| 1638 |
} |
1638 |
} |
| 1639 |
|
1639 |
|
| 1640 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1640 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 1641 |
void LSRUse::print(raw_ostream &OS) const { |
1641 |
void LSRUse::print(raw_ostream &OS) const { |
| 1642 |
OS << "LSR Use: Kind="; |
1642 |
OS << "LSR Use: Kind="; |
| 1643 |
switch (Kind) { |
1643 |
switch (Kind) { |
| 1644 |
case Basic: OS << "Basic"; break; |
1644 |
case Basic: OS << "Basic"; break; |
| 1645 |
case Special: OS << "Special"; break; |
1645 |
case Special: OS << "Special"; break; |
| 1646 |
case ICmpZero: OS << "ICmpZero"; break; |
1646 |
case ICmpZero: OS << "ICmpZero"; break; |
| 1647 |
case Address: |
1647 |
case Address: |
| 1648 |
OS << "Address of "; |
1648 |
OS << "Address of "; |
| 1649 |
if (AccessTy.MemTy->isPointerTy()) |
1649 |
if (AccessTy.MemTy->isPointerTy()) |
| 1650 |
OS << "pointer"; // the full pointer type could be really verbose |
1650 |
OS << "pointer"; // the full pointer type could be really verbose |
| 1651 |
else { |
1651 |
else { |
| 1652 |
OS << *AccessTy.MemTy; |
1652 |
OS << *AccessTy.MemTy; |
| 1653 |
} |
1653 |
} |
| 1654 |
|
1654 |
|
| 1655 |
OS << " in addrspace(" << AccessTy.AddrSpace << ')'; |
1655 |
OS << " in addrspace(" << AccessTy.AddrSpace << ')'; |
| 1656 |
} |
1656 |
} |
| 1657 |
|
1657 |
|
| 1658 |
OS << ", Offsets={"; |
1658 |
OS << ", Offsets={"; |
| 1659 |
bool NeedComma = false; |
1659 |
bool NeedComma = false; |
| 1660 |
for (const LSRFixup &Fixup : Fixups) { |
1660 |
for (const LSRFixup &Fixup : Fixups) { |
| 1661 |
if (NeedComma) OS << ','; |
1661 |
if (NeedComma) OS << ','; |
| 1662 |
OS << Fixup.Offset; |
1662 |
OS << Fixup.Offset; |
| 1663 |
NeedComma = true; |
1663 |
NeedComma = true; |
| 1664 |
} |
1664 |
} |
| 1665 |
OS << '}'; |
1665 |
OS << '}'; |
| 1666 |
|
1666 |
|
| 1667 |
if (AllFixupsOutsideLoop) |
1667 |
if (AllFixupsOutsideLoop) |
| 1668 |
OS << ", all-fixups-outside-loop"; |
1668 |
OS << ", all-fixups-outside-loop"; |
| 1669 |
|
1669 |
|
| 1670 |
if (WidestFixupType) |
1670 |
if (WidestFixupType) |
| 1671 |
OS << ", widest fixup type: " << *WidestFixupType; |
1671 |
OS << ", widest fixup type: " << *WidestFixupType; |
| 1672 |
} |
1672 |
} |
| 1673 |
|
1673 |
|
| 1674 |
LLVM_DUMP_METHOD void LSRUse::dump() const { |
1674 |
LLVM_DUMP_METHOD void LSRUse::dump() const { |
| 1675 |
print(errs()); errs() << '\n'; |
1675 |
print(errs()); errs() << '\n'; |
| 1676 |
} |
1676 |
} |
| 1677 |
#endif |
1677 |
#endif |
| 1678 |
|
1678 |
|
| 1679 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
1679 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
| 1680 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
1680 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
| 1681 |
GlobalValue *BaseGV, int64_t BaseOffset, |
1681 |
GlobalValue *BaseGV, int64_t BaseOffset, |
| 1682 |
bool HasBaseReg, int64_t Scale, |
1682 |
bool HasBaseReg, int64_t Scale, |
| 1683 |
Instruction *Fixup/*= nullptr*/) { |
1683 |
Instruction *Fixup/*= nullptr*/) { |
| 1684 |
switch (Kind) { |
1684 |
switch (Kind) { |
| 1685 |
case LSRUse::Address: |
1685 |
case LSRUse::Address: |
| 1686 |
return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset, |
1686 |
return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset, |
| 1687 |
HasBaseReg, Scale, AccessTy.AddrSpace, Fixup); |
1687 |
HasBaseReg, Scale, AccessTy.AddrSpace, Fixup); |
| 1688 |
|
1688 |
|
| 1689 |
case LSRUse::ICmpZero: |
1689 |
case LSRUse::ICmpZero: |
| 1690 |
// There's not even a target hook for querying whether it would be legal to |
1690 |
// There's not even a target hook for querying whether it would be legal to |
| 1691 |
// fold a GV into an ICmp. |
1691 |
// fold a GV into an ICmp. |
| 1692 |
if (BaseGV) |
1692 |
if (BaseGV) |
| 1693 |
return false; |
1693 |
return false; |
| 1694 |
|
1694 |
|
| 1695 |
// ICmp only has two operands; don't allow more than two non-trivial parts. |
1695 |
// ICmp only has two operands; don't allow more than two non-trivial parts. |
| 1696 |
if (Scale != 0 && HasBaseReg && BaseOffset != 0) |
1696 |
if (Scale != 0 && HasBaseReg && BaseOffset != 0) |
| 1697 |
return false; |
1697 |
return false; |
| 1698 |
|
1698 |
|
| 1699 |
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by |
1699 |
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by |
| 1700 |
// putting the scaled register in the other operand of the icmp. |
1700 |
// putting the scaled register in the other operand of the icmp. |
| 1701 |
if (Scale != 0 && Scale != -1) |
1701 |
if (Scale != 0 && Scale != -1) |
| 1702 |
return false; |
1702 |
return false; |
| 1703 |
|
1703 |
|
| 1704 |
// If we have low-level target information, ask the target if it can fold an |
1704 |
// If we have low-level target information, ask the target if it can fold an |
| 1705 |
// integer immediate on an icmp. |
1705 |
// integer immediate on an icmp. |
| 1706 |
if (BaseOffset != 0) { |
1706 |
if (BaseOffset != 0) { |
| 1707 |
// We have one of: |
1707 |
// We have one of: |
| 1708 |
// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset |
1708 |
// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset |
| 1709 |
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset |
1709 |
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset |
| 1710 |
// Offs is the ICmp immediate. |
1710 |
// Offs is the ICmp immediate. |
| 1711 |
if (Scale == 0) |
1711 |
if (Scale == 0) |
| 1712 |
// The cast does the right thing with |
1712 |
// The cast does the right thing with |
| 1713 |
// std::numeric_limits::min(). |
1713 |
// std::numeric_limits::min(). |
| 1714 |
BaseOffset = -(uint64_t)BaseOffset; |
1714 |
BaseOffset = -(uint64_t)BaseOffset; |
| 1715 |
return TTI.isLegalICmpImmediate(BaseOffset); |
1715 |
return TTI.isLegalICmpImmediate(BaseOffset); |
| 1716 |
} |
1716 |
} |
| 1717 |
|
1717 |
|
| 1718 |
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg |
1718 |
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg |
| 1719 |
return true; |
1719 |
return true; |
| 1720 |
|
1720 |
|
| 1721 |
case LSRUse::Basic: |
1721 |
case LSRUse::Basic: |
| 1722 |
// Only handle single-register values. |
1722 |
// Only handle single-register values. |
| 1723 |
return !BaseGV && Scale == 0 && BaseOffset == 0; |
1723 |
return !BaseGV && Scale == 0 && BaseOffset == 0; |
| 1724 |
|
1724 |
|
| 1725 |
case LSRUse::Special: |
1725 |
case LSRUse::Special: |
| 1726 |
// Special case Basic to handle -1 scales. |
1726 |
// Special case Basic to handle -1 scales. |
| 1727 |
return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0; |
1727 |
return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0; |
| 1728 |
} |
1728 |
} |
| 1729 |
|
1729 |
|
| 1730 |
llvm_unreachable("Invalid LSRUse Kind!"); |
1730 |
llvm_unreachable("Invalid LSRUse Kind!"); |
| 1731 |
} |
1731 |
} |
| 1732 |
|
1732 |
|
| 1733 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
1733 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
| 1734 |
int64_t MinOffset, int64_t MaxOffset, |
1734 |
int64_t MinOffset, int64_t MaxOffset, |
| 1735 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
1735 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
| 1736 |
GlobalValue *BaseGV, int64_t BaseOffset, |
1736 |
GlobalValue *BaseGV, int64_t BaseOffset, |
| 1737 |
bool HasBaseReg, int64_t Scale) { |
1737 |
bool HasBaseReg, int64_t Scale) { |
| 1738 |
// Check for overflow. |
1738 |
// Check for overflow. |
| 1739 |
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != |
1739 |
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != |
| 1740 |
(MinOffset > 0)) |
1740 |
(MinOffset > 0)) |
| 1741 |
return false; |
1741 |
return false; |
| 1742 |
MinOffset = (uint64_t)BaseOffset + MinOffset; |
1742 |
MinOffset = (uint64_t)BaseOffset + MinOffset; |
| 1743 |
if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) != |
1743 |
if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) != |
| 1744 |
(MaxOffset > 0)) |
1744 |
(MaxOffset > 0)) |
| 1745 |
return false; |
1745 |
return false; |
| 1746 |
MaxOffset = (uint64_t)BaseOffset + MaxOffset; |
1746 |
MaxOffset = (uint64_t)BaseOffset + MaxOffset; |
| 1747 |
|
1747 |
|
| 1748 |
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, |
1748 |
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, |
| 1749 |
HasBaseReg, Scale) && |
1749 |
HasBaseReg, Scale) && |
| 1750 |
isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, |
1750 |
isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, |
| 1751 |
HasBaseReg, Scale); |
1751 |
HasBaseReg, Scale); |
| 1752 |
} |
1752 |
} |
| 1753 |
|
1753 |
|
| 1754 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
1754 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
| 1755 |
int64_t MinOffset, int64_t MaxOffset, |
1755 |
int64_t MinOffset, int64_t MaxOffset, |
| 1756 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
1756 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
| 1757 |
const Formula &F, const Loop &L) { |
1757 |
const Formula &F, const Loop &L) { |
| 1758 |
// For the purpose of isAMCompletelyFolded either having a canonical formula |
1758 |
// For the purpose of isAMCompletelyFolded either having a canonical formula |
| 1759 |
// or a scale not equal to zero is correct. |
1759 |
// or a scale not equal to zero is correct. |
| 1760 |
// Problems may arise from non canonical formulae having a scale == 0. |
1760 |
// Problems may arise from non canonical formulae having a scale == 0. |
| 1761 |
// Strictly speaking it would best to just rely on canonical formulae. |
1761 |
// Strictly speaking it would best to just rely on canonical formulae. |
| 1762 |
// However, when we generate the scaled formulae, we first check that the |
1762 |
// However, when we generate the scaled formulae, we first check that the |
| 1763 |
// scaling factor is profitable before computing the actual ScaledReg for |
1763 |
// scaling factor is profitable before computing the actual ScaledReg for |
| 1764 |
// compile time sake. |
1764 |
// compile time sake. |
| 1765 |
assert((F.isCanonical(L) || F.Scale != 0)); |
1765 |
assert((F.isCanonical(L) || F.Scale != 0)); |
| 1766 |
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, |
1766 |
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, |
| 1767 |
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); |
1767 |
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); |
| 1768 |
} |
1768 |
} |
| 1769 |
|
1769 |
|
| 1770 |
/// Test whether we know how to expand the current formula. |
1770 |
/// Test whether we know how to expand the current formula. |
| 1771 |
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, |
1771 |
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, |
| 1772 |
int64_t MaxOffset, LSRUse::KindType Kind, |
1772 |
int64_t MaxOffset, LSRUse::KindType Kind, |
| 1773 |
MemAccessTy AccessTy, GlobalValue *BaseGV, |
1773 |
MemAccessTy AccessTy, GlobalValue *BaseGV, |
| 1774 |
int64_t BaseOffset, bool HasBaseReg, int64_t Scale) { |
1774 |
int64_t BaseOffset, bool HasBaseReg, int64_t Scale) { |
| 1775 |
// We know how to expand completely foldable formulae. |
1775 |
// We know how to expand completely foldable formulae. |
| 1776 |
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, |
1776 |
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, |
| 1777 |
BaseOffset, HasBaseReg, Scale) || |
1777 |
BaseOffset, HasBaseReg, Scale) || |
| 1778 |
// Or formulae that use a base register produced by a sum of base |
1778 |
// Or formulae that use a base register produced by a sum of base |
| 1779 |
// registers. |
1779 |
// registers. |
| 1780 |
(Scale == 1 && |
1780 |
(Scale == 1 && |
| 1781 |
isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, |
1781 |
isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, |
| 1782 |
BaseGV, BaseOffset, true, 0)); |
1782 |
BaseGV, BaseOffset, true, 0)); |
| 1783 |
} |
1783 |
} |
| 1784 |
|
1784 |
|
| 1785 |
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, |
1785 |
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, |
| 1786 |
int64_t MaxOffset, LSRUse::KindType Kind, |
1786 |
int64_t MaxOffset, LSRUse::KindType Kind, |
| 1787 |
MemAccessTy AccessTy, const Formula &F) { |
1787 |
MemAccessTy AccessTy, const Formula &F) { |
| 1788 |
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, |
1788 |
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, |
| 1789 |
F.BaseOffset, F.HasBaseReg, F.Scale); |
1789 |
F.BaseOffset, F.HasBaseReg, F.Scale); |
| 1790 |
} |
1790 |
} |
| 1791 |
|
1791 |
|
| 1792 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
1792 |
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, |
| 1793 |
const LSRUse &LU, const Formula &F) { |
1793 |
const LSRUse &LU, const Formula &F) { |
| 1794 |
// Target may want to look at the user instructions. |
1794 |
// Target may want to look at the user instructions. |
| 1795 |
if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) { |
1795 |
if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) { |
| 1796 |
for (const LSRFixup &Fixup : LU.Fixups) |
1796 |
for (const LSRFixup &Fixup : LU.Fixups) |
| 1797 |
if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, |
1797 |
if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, |
| 1798 |
(F.BaseOffset + Fixup.Offset), F.HasBaseReg, |
1798 |
(F.BaseOffset + Fixup.Offset), F.HasBaseReg, |
| 1799 |
F.Scale, Fixup.UserInst)) |
1799 |
F.Scale, Fixup.UserInst)) |
| 1800 |
return false; |
1800 |
return false; |
| 1801 |
return true; |
1801 |
return true; |
| 1802 |
} |
1802 |
} |
| 1803 |
|
1803 |
|
| 1804 |
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, |
1804 |
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, |
| 1805 |
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, |
1805 |
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, |
| 1806 |
F.Scale); |
1806 |
F.Scale); |
| 1807 |
} |
1807 |
} |
| 1808 |
|
1808 |
|
| 1809 |
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, |
1809 |
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, |
| 1810 |
const LSRUse &LU, const Formula &F, |
1810 |
const LSRUse &LU, const Formula &F, |
| 1811 |
const Loop &L) { |
1811 |
const Loop &L) { |
| 1812 |
if (!F.Scale) |
1812 |
if (!F.Scale) |
| 1813 |
return 0; |
1813 |
return 0; |
| 1814 |
|
1814 |
|
| 1815 |
// If the use is not completely folded in that instruction, we will have to |
1815 |
// If the use is not completely folded in that instruction, we will have to |
| 1816 |
// pay an extra cost only for scale != 1. |
1816 |
// pay an extra cost only for scale != 1. |
| 1817 |
if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, |
1817 |
if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, |
| 1818 |
LU.AccessTy, F, L)) |
1818 |
LU.AccessTy, F, L)) |
| 1819 |
return F.Scale != 1; |
1819 |
return F.Scale != 1; |
| 1820 |
|
1820 |
|
| 1821 |
switch (LU.Kind) { |
1821 |
switch (LU.Kind) { |
| 1822 |
case LSRUse::Address: { |
1822 |
case LSRUse::Address: { |
| 1823 |
// Check the scaling factor cost with both the min and max offsets. |
1823 |
// Check the scaling factor cost with both the min and max offsets. |
| 1824 |
InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost( |
1824 |
InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost( |
| 1825 |
LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg, |
1825 |
LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg, |
| 1826 |
F.Scale, LU.AccessTy.AddrSpace); |
1826 |
F.Scale, LU.AccessTy.AddrSpace); |
| 1827 |
InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost( |
1827 |
InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost( |
| 1828 |
LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg, |
1828 |
LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg, |
| 1829 |
F.Scale, LU.AccessTy.AddrSpace); |
1829 |
F.Scale, LU.AccessTy.AddrSpace); |
| 1830 |
|
1830 |
|
| 1831 |
assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() && |
1831 |
assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() && |
| 1832 |
"Legal addressing mode has an illegal cost!"); |
1832 |
"Legal addressing mode has an illegal cost!"); |
| 1833 |
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); |
1833 |
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); |
| 1834 |
} |
1834 |
} |
| 1835 |
case LSRUse::ICmpZero: |
1835 |
case LSRUse::ICmpZero: |
| 1836 |
case LSRUse::Basic: |
1836 |
case LSRUse::Basic: |
| 1837 |
case LSRUse::Special: |
1837 |
case LSRUse::Special: |
| 1838 |
// The use is completely folded, i.e., everything is folded into the |
1838 |
// The use is completely folded, i.e., everything is folded into the |
| 1839 |
// instruction. |
1839 |
// instruction. |
| 1840 |
return 0; |
1840 |
return 0; |
| 1841 |
} |
1841 |
} |
| 1842 |
|
1842 |
|
| 1843 |
llvm_unreachable("Invalid LSRUse Kind!"); |
1843 |
llvm_unreachable("Invalid LSRUse Kind!"); |
| 1844 |
} |
1844 |
} |
| 1845 |
|
1845 |
|
| 1846 |
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, |
1846 |
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, |
| 1847 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
1847 |
LSRUse::KindType Kind, MemAccessTy AccessTy, |
| 1848 |
GlobalValue *BaseGV, int64_t BaseOffset, |
1848 |
GlobalValue *BaseGV, int64_t BaseOffset, |
| 1849 |
bool HasBaseReg) { |
1849 |
bool HasBaseReg) { |
| 1850 |
// Fast-path: zero is always foldable. |
1850 |
// Fast-path: zero is always foldable. |
| 1851 |
if (BaseOffset == 0 && !BaseGV) return true; |
1851 |
if (BaseOffset == 0 && !BaseGV) return true; |
| 1852 |
|
1852 |
|
| 1853 |
// Conservatively, create an address with an immediate and a |
1853 |
// Conservatively, create an address with an immediate and a |
| 1854 |
// base and a scale. |
1854 |
// base and a scale. |
| 1855 |
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; |
1855 |
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; |
| 1856 |
|
1856 |
|
| 1857 |
// Canonicalize a scale of 1 to a base register if the formula doesn't |
1857 |
// Canonicalize a scale of 1 to a base register if the formula doesn't |
| 1858 |
// already have a base register. |
1858 |
// already have a base register. |
| 1859 |
if (!HasBaseReg && Scale == 1) { |
1859 |
if (!HasBaseReg && Scale == 1) { |
| 1860 |
Scale = 0; |
1860 |
Scale = 0; |
| 1861 |
HasBaseReg = true; |
1861 |
HasBaseReg = true; |
| 1862 |
} |
1862 |
} |
| 1863 |
|
1863 |
|
| 1864 |
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, |
1864 |
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, |
| 1865 |
HasBaseReg, Scale); |
1865 |
HasBaseReg, Scale); |
| 1866 |
} |
1866 |
} |
| 1867 |
|
1867 |
|
| 1868 |
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, |
1868 |
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, |
| 1869 |
ScalarEvolution &SE, int64_t MinOffset, |
1869 |
ScalarEvolution &SE, int64_t MinOffset, |
| 1870 |
int64_t MaxOffset, LSRUse::KindType Kind, |
1870 |
int64_t MaxOffset, LSRUse::KindType Kind, |
| 1871 |
MemAccessTy AccessTy, const SCEV *S, |
1871 |
MemAccessTy AccessTy, const SCEV *S, |
| 1872 |
bool HasBaseReg) { |
1872 |
bool HasBaseReg) { |
| 1873 |
// Fast-path: zero is always foldable. |
1873 |
// Fast-path: zero is always foldable. |
| 1874 |
if (S->isZero()) return true; |
1874 |
if (S->isZero()) return true; |
| 1875 |
|
1875 |
|
| 1876 |
// Conservatively, create an address with an immediate and a |
1876 |
// Conservatively, create an address with an immediate and a |
| 1877 |
// base and a scale. |
1877 |
// base and a scale. |
| 1878 |
int64_t BaseOffset = ExtractImmediate(S, SE); |
1878 |
int64_t BaseOffset = ExtractImmediate(S, SE); |
| 1879 |
GlobalValue *BaseGV = ExtractSymbol(S, SE); |
1879 |
GlobalValue *BaseGV = ExtractSymbol(S, SE); |
| 1880 |
|
1880 |
|
| 1881 |
// If there's anything else involved, it's not foldable. |
1881 |
// If there's anything else involved, it's not foldable. |
| 1882 |
if (!S->isZero()) return false; |
1882 |
if (!S->isZero()) return false; |
| 1883 |
|
1883 |
|
| 1884 |
// Fast-path: zero is always foldable. |
1884 |
// Fast-path: zero is always foldable. |
| 1885 |
if (BaseOffset == 0 && !BaseGV) return true; |
1885 |
if (BaseOffset == 0 && !BaseGV) return true; |
| 1886 |
|
1886 |
|
| 1887 |
// Conservatively, create an address with an immediate and a |
1887 |
// Conservatively, create an address with an immediate and a |
| 1888 |
// base and a scale. |
1888 |
// base and a scale. |
| 1889 |
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; |
1889 |
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; |
| 1890 |
|
1890 |
|
| 1891 |
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, |
1891 |
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, |
| 1892 |
BaseOffset, HasBaseReg, Scale); |
1892 |
BaseOffset, HasBaseReg, Scale); |
| 1893 |
} |
1893 |
} |
| 1894 |
|
1894 |
|
| 1895 |
namespace { |
1895 |
namespace { |
| 1896 |
|
1896 |
|
| 1897 |
/// An individual increment in a Chain of IV increments. Relate an IV user to |
1897 |
/// An individual increment in a Chain of IV increments. Relate an IV user to |
| 1898 |
/// an expression that computes the IV it uses from the IV used by the previous |
1898 |
/// an expression that computes the IV it uses from the IV used by the previous |
| 1899 |
/// link in the Chain. |
1899 |
/// link in the Chain. |
| 1900 |
/// |
1900 |
/// |
| 1901 |
/// For the head of a chain, IncExpr holds the absolute SCEV expression for the |
1901 |
/// For the head of a chain, IncExpr holds the absolute SCEV expression for the |
| 1902 |
/// original IVOperand. The head of the chain's IVOperand is only valid during |
1902 |
/// original IVOperand. The head of the chain's IVOperand is only valid during |
| 1903 |
/// chain collection, before LSR replaces IV users. During chain generation, |
1903 |
/// chain collection, before LSR replaces IV users. During chain generation, |
| 1904 |
/// IncExpr can be used to find the new IVOperand that computes the same |
1904 |
/// IncExpr can be used to find the new IVOperand that computes the same |
| 1905 |
/// expression. |
1905 |
/// expression. |
| 1906 |
struct IVInc { |
1906 |
struct IVInc { |
| 1907 |
Instruction *UserInst; |
1907 |
Instruction *UserInst; |
| 1908 |
Value* IVOperand; |
1908 |
Value* IVOperand; |
| 1909 |
const SCEV *IncExpr; |
1909 |
const SCEV *IncExpr; |
| 1910 |
|
1910 |
|
| 1911 |
IVInc(Instruction *U, Value *O, const SCEV *E) |
1911 |
IVInc(Instruction *U, Value *O, const SCEV *E) |
| 1912 |
: UserInst(U), IVOperand(O), IncExpr(E) {} |
1912 |
: UserInst(U), IVOperand(O), IncExpr(E) {} |
| 1913 |
}; |
1913 |
}; |
| 1914 |
|
1914 |
|
| 1915 |
// The list of IV increments in program order. We typically add the head of a |
1915 |
// The list of IV increments in program order. We typically add the head of a |
| 1916 |
// chain without finding subsequent links. |
1916 |
// chain without finding subsequent links. |
| 1917 |
struct IVChain { |
1917 |
struct IVChain { |
| 1918 |
SmallVector Incs; |
1918 |
SmallVector Incs; |
| 1919 |
const SCEV *ExprBase = nullptr; |
1919 |
const SCEV *ExprBase = nullptr; |
| 1920 |
|
1920 |
|
| 1921 |
IVChain() = default; |
1921 |
IVChain() = default; |
| 1922 |
IVChain(const IVInc &Head, const SCEV *Base) |
1922 |
IVChain(const IVInc &Head, const SCEV *Base) |
| 1923 |
: Incs(1, Head), ExprBase(Base) {} |
1923 |
: Incs(1, Head), ExprBase(Base) {} |
| 1924 |
|
1924 |
|
| 1925 |
using const_iterator = SmallVectorImpl::const_iterator; |
1925 |
using const_iterator = SmallVectorImpl::const_iterator; |
| 1926 |
|
1926 |
|
| 1927 |
// Return the first increment in the chain. |
1927 |
// Return the first increment in the chain. |
| 1928 |
const_iterator begin() const { |
1928 |
const_iterator begin() const { |
| 1929 |
assert(!Incs.empty()); |
1929 |
assert(!Incs.empty()); |
| 1930 |
return std::next(Incs.begin()); |
1930 |
return std::next(Incs.begin()); |
| 1931 |
} |
1931 |
} |
| 1932 |
const_iterator end() const { |
1932 |
const_iterator end() const { |
| 1933 |
return Incs.end(); |
1933 |
return Incs.end(); |
| 1934 |
} |
1934 |
} |
| 1935 |
|
1935 |
|
| 1936 |
// Returns true if this chain contains any increments. |
1936 |
// Returns true if this chain contains any increments. |
| 1937 |
bool hasIncs() const { return Incs.size() >= 2; } |
1937 |
bool hasIncs() const { return Incs.size() >= 2; } |
| 1938 |
|
1938 |
|
| 1939 |
// Add an IVInc to the end of this chain. |
1939 |
// Add an IVInc to the end of this chain. |
| 1940 |
void add(const IVInc &X) { Incs.push_back(X); } |
1940 |
void add(const IVInc &X) { Incs.push_back(X); } |
| 1941 |
|
1941 |
|
| 1942 |
// Returns the last UserInst in the chain. |
1942 |
// Returns the last UserInst in the chain. |
| 1943 |
Instruction *tailUserInst() const { return Incs.back().UserInst; } |
1943 |
Instruction *tailUserInst() const { return Incs.back().UserInst; } |
| 1944 |
|
1944 |
|
| 1945 |
// Returns true if IncExpr can be profitably added to this chain. |
1945 |
// Returns true if IncExpr can be profitably added to this chain. |
| 1946 |
bool isProfitableIncrement(const SCEV *OperExpr, |
1946 |
bool isProfitableIncrement(const SCEV *OperExpr, |
| 1947 |
const SCEV *IncExpr, |
1947 |
const SCEV *IncExpr, |
| 1948 |
ScalarEvolution&); |
1948 |
ScalarEvolution&); |
| 1949 |
}; |
1949 |
}; |
| 1950 |
|
1950 |
|
| 1951 |
/// Helper for CollectChains to track multiple IV increment uses. Distinguish |
1951 |
/// Helper for CollectChains to track multiple IV increment uses. Distinguish |
| 1952 |
/// between FarUsers that definitely cross IV increments and NearUsers that may |
1952 |
/// between FarUsers that definitely cross IV increments and NearUsers that may |
| 1953 |
/// be used between IV increments. |
1953 |
/// be used between IV increments. |
| 1954 |
struct ChainUsers { |
1954 |
struct ChainUsers { |
| 1955 |
SmallPtrSet FarUsers; |
1955 |
SmallPtrSet FarUsers; |
| 1956 |
SmallPtrSet NearUsers; |
1956 |
SmallPtrSet NearUsers; |
| 1957 |
}; |
1957 |
}; |
| 1958 |
|
1958 |
|
| 1959 |
/// This class holds state for the main loop strength reduction logic. |
1959 |
/// This class holds state for the main loop strength reduction logic. |
| 1960 |
class LSRInstance { |
1960 |
class LSRInstance { |
| 1961 |
IVUsers &IU; |
1961 |
IVUsers &IU; |
| 1962 |
ScalarEvolution &SE; |
1962 |
ScalarEvolution &SE; |
| 1963 |
DominatorTree &DT; |
1963 |
DominatorTree &DT; |
| 1964 |
LoopInfo &LI; |
1964 |
LoopInfo &LI; |
| 1965 |
AssumptionCache &AC; |
1965 |
AssumptionCache &AC; |
| 1966 |
TargetLibraryInfo &TLI; |
1966 |
TargetLibraryInfo &TLI; |
| 1967 |
const TargetTransformInfo &TTI; |
1967 |
const TargetTransformInfo &TTI; |
| 1968 |
Loop *const L; |
1968 |
Loop *const L; |
| 1969 |
MemorySSAUpdater *MSSAU; |
1969 |
MemorySSAUpdater *MSSAU; |
| 1970 |
TTI::AddressingModeKind AMK; |
1970 |
TTI::AddressingModeKind AMK; |
| 1971 |
mutable SCEVExpander Rewriter; |
1971 |
mutable SCEVExpander Rewriter; |
| 1972 |
bool Changed = false; |
1972 |
bool Changed = false; |
| 1973 |
|
1973 |
|
| 1974 |
/// This is the insert position that the current loop's induction variable |
1974 |
/// This is the insert position that the current loop's induction variable |
| 1975 |
/// increment should be placed. In simple loops, this is the latch block's |
1975 |
/// increment should be placed. In simple loops, this is the latch block's |
| 1976 |
/// terminator. But in more complicated cases, this is a position which will |
1976 |
/// terminator. But in more complicated cases, this is a position which will |
| 1977 |
/// dominate all the in-loop post-increment users. |
1977 |
/// dominate all the in-loop post-increment users. |
| 1978 |
Instruction *IVIncInsertPos = nullptr; |
1978 |
Instruction *IVIncInsertPos = nullptr; |
| 1979 |
|
1979 |
|
| 1980 |
/// Interesting factors between use strides. |
1980 |
/// Interesting factors between use strides. |
| 1981 |
/// |
1981 |
/// |
| 1982 |
/// We explicitly use a SetVector which contains a SmallSet, instead of the |
1982 |
/// We explicitly use a SetVector which contains a SmallSet, instead of the |
| 1983 |
/// default, a SmallDenseSet, because we need to use the full range of |
1983 |
/// default, a SmallDenseSet, because we need to use the full range of |
| 1984 |
/// int64_ts, and there's currently no good way of doing that with |
1984 |
/// int64_ts, and there's currently no good way of doing that with |
| 1985 |
/// SmallDenseSet. |
1985 |
/// SmallDenseSet. |
| 1986 |
SetVector, SmallSet> Factors; |
1986 |
SetVector, SmallSet> Factors; |
| 1987 |
|
1987 |
|
| 1988 |
/// The cost of the current SCEV, the best solution by LSR will be dropped if |
1988 |
/// The cost of the current SCEV, the best solution by LSR will be dropped if |
| 1989 |
/// the solution is not profitable. |
1989 |
/// the solution is not profitable. |
| 1990 |
Cost BaselineCost; |
1990 |
Cost BaselineCost; |
| 1991 |
|
1991 |
|
| 1992 |
/// Interesting use types, to facilitate truncation reuse. |
1992 |
/// Interesting use types, to facilitate truncation reuse. |
| 1993 |
SmallSetVector Types; |
1993 |
SmallSetVector Types; |
| 1994 |
|
1994 |
|
| 1995 |
/// The list of interesting uses. |
1995 |
/// The list of interesting uses. |
| 1996 |
mutable SmallVector Uses; |
1996 |
mutable SmallVector Uses; |
| 1997 |
|
1997 |
|
| 1998 |
/// Track which uses use which register candidates. |
1998 |
/// Track which uses use which register candidates. |
| 1999 |
RegUseTracker RegUses; |
1999 |
RegUseTracker RegUses; |
| 2000 |
|
2000 |
|
| 2001 |
// Limit the number of chains to avoid quadratic behavior. We don't expect to |
2001 |
// Limit the number of chains to avoid quadratic behavior. We don't expect to |
| 2002 |
// have more than a few IV increment chains in a loop. Missing a Chain falls |
2002 |
// have more than a few IV increment chains in a loop. Missing a Chain falls |
| 2003 |
// back to normal LSR behavior for those uses. |
2003 |
// back to normal LSR behavior for those uses. |
| 2004 |
static const unsigned MaxChains = 8; |
2004 |
static const unsigned MaxChains = 8; |
| 2005 |
|
2005 |
|
| 2006 |
/// IV users can form a chain of IV increments. |
2006 |
/// IV users can form a chain of IV increments. |
| 2007 |
SmallVector IVChainVec; |
2007 |
SmallVector IVChainVec; |
| 2008 |
|
2008 |
|
| 2009 |
/// IV users that belong to profitable IVChains. |
2009 |
/// IV users that belong to profitable IVChains. |
| 2010 |
SmallPtrSet |
2010 |
SmallPtrSet |
| 2011 |
|
2011 |
|
| 2012 |
/// Induction variables that were generated and inserted by the SCEV Expander. |
2012 |
/// Induction variables that were generated and inserted by the SCEV Expander. |
| 2013 |
SmallVector ScalarEvolutionIVs; |
2013 |
SmallVector ScalarEvolutionIVs; |
| 2014 |
|
2014 |
|
| 2015 |
void OptimizeShadowIV(); |
2015 |
void OptimizeShadowIV(); |
| 2016 |
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); |
2016 |
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); |
| 2017 |
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); |
2017 |
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); |
| 2018 |
void OptimizeLoopTermCond(); |
2018 |
void OptimizeLoopTermCond(); |
| 2019 |
|
2019 |
|
| 2020 |
void ChainInstruction(Instruction *UserInst, Instruction *IVOper, |
2020 |
void ChainInstruction(Instruction *UserInst, Instruction *IVOper, |
| 2021 |
SmallVectorImpl &ChainUsersVec); |
2021 |
SmallVectorImpl &ChainUsersVec); |
| 2022 |
void FinalizeChain(IVChain &Chain); |
2022 |
void FinalizeChain(IVChain &Chain); |
| 2023 |
void CollectChains(); |
2023 |
void CollectChains(); |
| 2024 |
void GenerateIVChain(const IVChain &Chain, |
2024 |
void GenerateIVChain(const IVChain &Chain, |
| 2025 |
SmallVectorImpl &DeadInsts); |
2025 |
SmallVectorImpl &DeadInsts); |
| 2026 |
|
2026 |
|
| 2027 |
void CollectInterestingTypesAndFactors(); |
2027 |
void CollectInterestingTypesAndFactors(); |
| 2028 |
void CollectFixupsAndInitialFormulae(); |
2028 |
void CollectFixupsAndInitialFormulae(); |
| 2029 |
|
2029 |
|
| 2030 |
// Support for sharing of LSRUses between LSRFixups. |
2030 |
// Support for sharing of LSRUses between LSRFixups. |
| 2031 |
using UseMapTy = DenseMap; |
2031 |
using UseMapTy = DenseMap; |
| 2032 |
UseMapTy UseMap; |
2032 |
UseMapTy UseMap; |
| 2033 |
|
2033 |
|
| 2034 |
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, |
2034 |
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, |
| 2035 |
LSRUse::KindType Kind, MemAccessTy AccessTy); |
2035 |
LSRUse::KindType Kind, MemAccessTy AccessTy); |
| 2036 |
|
2036 |
|
| 2037 |
std::pair getUse(const SCEV *&Expr, LSRUse::KindType Kind, |
2037 |
std::pair getUse(const SCEV *&Expr, LSRUse::KindType Kind, |
| 2038 |
MemAccessTy AccessTy); |
2038 |
MemAccessTy AccessTy); |
| 2039 |
|
2039 |
|
| 2040 |
void DeleteUse(LSRUse &LU, size_t LUIdx); |
2040 |
void DeleteUse(LSRUse &LU, size_t LUIdx); |
| 2041 |
|
2041 |
|
| 2042 |
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); |
2042 |
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); |
| 2043 |
|
2043 |
|
| 2044 |
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); |
2044 |
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); |
| 2045 |
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); |
2045 |
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); |
| 2046 |
void CountRegisters(const Formula &F, size_t LUIdx); |
2046 |
void CountRegisters(const Formula &F, size_t LUIdx); |
| 2047 |
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); |
2047 |
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); |
| 2048 |
|
2048 |
|
| 2049 |
void CollectLoopInvariantFixupsAndFormulae(); |
2049 |
void CollectLoopInvariantFixupsAndFormulae(); |
| 2050 |
|
2050 |
|
| 2051 |
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, |
2051 |
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, |
| 2052 |
unsigned Depth = 0); |
2052 |
unsigned Depth = 0); |
| 2053 |
|
2053 |
|
| 2054 |
void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, |
2054 |
void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, |
| 2055 |
const Formula &Base, unsigned Depth, |
2055 |
const Formula &Base, unsigned Depth, |
| 2056 |
size_t Idx, bool IsScaledReg = false); |
2056 |
size_t Idx, bool IsScaledReg = false); |
| 2057 |
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); |
2057 |
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); |
| 2058 |
void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, |
2058 |
void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, |
| 2059 |
const Formula &Base, size_t Idx, |
2059 |
const Formula &Base, size_t Idx, |
| 2060 |
bool IsScaledReg = false); |
2060 |
bool IsScaledReg = false); |
| 2061 |
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); |
2061 |
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); |
| 2062 |
void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx, |
2062 |
void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx, |
| 2063 |
const Formula &Base, |
2063 |
const Formula &Base, |
| 2064 |
const SmallVectorImpl &Worklist, |
2064 |
const SmallVectorImpl &Worklist, |
| 2065 |
size_t Idx, bool IsScaledReg = false); |
2065 |
size_t Idx, bool IsScaledReg = false); |
| 2066 |
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); |
2066 |
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); |
| 2067 |
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); |
2067 |
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); |
| 2068 |
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); |
2068 |
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); |
| 2069 |
void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); |
2069 |
void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); |
| 2070 |
void GenerateCrossUseConstantOffsets(); |
2070 |
void GenerateCrossUseConstantOffsets(); |
| 2071 |
void GenerateAllReuseFormulae(); |
2071 |
void GenerateAllReuseFormulae(); |
| 2072 |
|
2072 |
|
| 2073 |
void FilterOutUndesirableDedicatedRegisters(); |
2073 |
void FilterOutUndesirableDedicatedRegisters(); |
| 2074 |
|
2074 |
|
| 2075 |
size_t EstimateSearchSpaceComplexity() const; |
2075 |
size_t EstimateSearchSpaceComplexity() const; |
| 2076 |
void NarrowSearchSpaceByDetectingSupersets(); |
2076 |
void NarrowSearchSpaceByDetectingSupersets(); |
| 2077 |
void NarrowSearchSpaceByCollapsingUnrolledCode(); |
2077 |
void NarrowSearchSpaceByCollapsingUnrolledCode(); |
| 2078 |
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); |
2078 |
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); |
| 2079 |
void NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); |
2079 |
void NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); |
| 2080 |
void NarrowSearchSpaceByFilterPostInc(); |
2080 |
void NarrowSearchSpaceByFilterPostInc(); |
| 2081 |
void NarrowSearchSpaceByDeletingCostlyFormulas(); |
2081 |
void NarrowSearchSpaceByDeletingCostlyFormulas(); |
| 2082 |
void NarrowSearchSpaceByPickingWinnerRegs(); |
2082 |
void NarrowSearchSpaceByPickingWinnerRegs(); |
| 2083 |
void NarrowSearchSpaceUsingHeuristics(); |
2083 |
void NarrowSearchSpaceUsingHeuristics(); |
| 2084 |
|
2084 |
|
| 2085 |
void SolveRecurse(SmallVectorImpl &Solution, |
2085 |
void SolveRecurse(SmallVectorImpl &Solution, |
| 2086 |
Cost &SolutionCost, |
2086 |
Cost &SolutionCost, |
| 2087 |
SmallVectorImpl &Workspace, |
2087 |
SmallVectorImpl &Workspace, |
| 2088 |
const Cost &CurCost, |
2088 |
const Cost &CurCost, |
| 2089 |
const SmallPtrSet &CurRegs, |
2089 |
const SmallPtrSet &CurRegs, |
| 2090 |
DenseSet &VisitedRegs) const; |
2090 |
DenseSet &VisitedRegs) const; |
| 2091 |
void Solve(SmallVectorImpl &Solution) const; |
2091 |
void Solve(SmallVectorImpl &Solution) const; |
| 2092 |
|
2092 |
|
| 2093 |
BasicBlock::iterator |
2093 |
BasicBlock::iterator |
| 2094 |
HoistInsertPosition(BasicBlock::iterator IP, |
2094 |
HoistInsertPosition(BasicBlock::iterator IP, |
| 2095 |
const SmallVectorImpl &Inputs) const; |
2095 |
const SmallVectorImpl &Inputs) const; |
| 2096 |
BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, |
2096 |
BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, |
| 2097 |
const LSRFixup &LF, |
2097 |
const LSRFixup &LF, |
| 2098 |
const LSRUse &LU) const; |
2098 |
const LSRUse &LU) const; |
| 2099 |
|
2099 |
|
| 2100 |
Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, |
2100 |
Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, |
| 2101 |
BasicBlock::iterator IP, |
2101 |
BasicBlock::iterator IP, |
| 2102 |
SmallVectorImpl &DeadInsts) const; |
2102 |
SmallVectorImpl &DeadInsts) const; |
| 2103 |
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, |
2103 |
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, |
| 2104 |
const Formula &F, |
2104 |
const Formula &F, |
| 2105 |
SmallVectorImpl &DeadInsts) const; |
2105 |
SmallVectorImpl &DeadInsts) const; |
| 2106 |
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, |
2106 |
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, |
| 2107 |
SmallVectorImpl &DeadInsts) const; |
2107 |
SmallVectorImpl &DeadInsts) const; |
| 2108 |
void ImplementSolution(const SmallVectorImpl &Solution); |
2108 |
void ImplementSolution(const SmallVectorImpl &Solution); |
| 2109 |
|
2109 |
|
| 2110 |
public: |
2110 |
public: |
| 2111 |
LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, |
2111 |
LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, |
| 2112 |
LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, |
2112 |
LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, |
| 2113 |
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU); |
2113 |
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU); |
| 2114 |
|
2114 |
|
| 2115 |
bool getChanged() const { return Changed; } |
2115 |
bool getChanged() const { return Changed; } |
| 2116 |
const SmallVectorImpl &getScalarEvolutionIVs() const { |
2116 |
const SmallVectorImpl &getScalarEvolutionIVs() const { |
| 2117 |
return ScalarEvolutionIVs; |
2117 |
return ScalarEvolutionIVs; |
| 2118 |
} |
2118 |
} |
| 2119 |
|
2119 |
|
| 2120 |
void print_factors_and_types(raw_ostream &OS) const; |
2120 |
void print_factors_and_types(raw_ostream &OS) const; |
| 2121 |
void print_fixups(raw_ostream &OS) const; |
2121 |
void print_fixups(raw_ostream &OS) const; |
| 2122 |
void print_uses(raw_ostream &OS) const; |
2122 |
void print_uses(raw_ostream &OS) const; |
| 2123 |
void print(raw_ostream &OS) const; |
2123 |
void print(raw_ostream &OS) const; |
| 2124 |
void dump() const; |
2124 |
void dump() const; |
| 2125 |
}; |
2125 |
}; |
| 2126 |
|
2126 |
|
| 2127 |
} // end anonymous namespace |
2127 |
} // end anonymous namespace |
| 2128 |
|
2128 |
|
| 2129 |
/// If IV is used in a int-to-float cast inside the loop then try to eliminate |
2129 |
/// If IV is used in a int-to-float cast inside the loop then try to eliminate |
| 2130 |
/// the cast operation. |
2130 |
/// the cast operation. |
| 2131 |
void LSRInstance::OptimizeShadowIV() { |
2131 |
void LSRInstance::OptimizeShadowIV() { |
| 2132 |
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); |
2132 |
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); |
| 2133 |
if (isa(BackedgeTakenCount)) |
2133 |
if (isa(BackedgeTakenCount)) |
| 2134 |
return; |
2134 |
return; |
| 2135 |
|
2135 |
|
| 2136 |
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); |
2136 |
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); |
| 2137 |
UI != E; /* empty */) { |
2137 |
UI != E; /* empty */) { |
| 2138 |
IVUsers::const_iterator CandidateUI = UI; |
2138 |
IVUsers::const_iterator CandidateUI = UI; |
| 2139 |
++UI; |
2139 |
++UI; |
| 2140 |
Instruction *ShadowUse = CandidateUI->getUser(); |
2140 |
Instruction *ShadowUse = CandidateUI->getUser(); |
| 2141 |
Type *DestTy = nullptr; |
2141 |
Type *DestTy = nullptr; |
| 2142 |
bool IsSigned = false; |
2142 |
bool IsSigned = false; |
| 2143 |
|
2143 |
|
| 2144 |
/* If shadow use is a int->float cast then insert a second IV |
2144 |
/* If shadow use is a int->float cast then insert a second IV |
| 2145 |
to eliminate this cast. |
2145 |
to eliminate this cast. |
| 2146 |
|
2146 |
|
| 2147 |
for (unsigned i = 0; i < n; ++i) |
2147 |
for (unsigned i = 0; i < n; ++i) |
| 2148 |
foo((double)i); |
2148 |
foo((double)i); |
| 2149 |
|
2149 |
|
| 2150 |
is transformed into |
2150 |
is transformed into |
| 2151 |
|
2151 |
|
| 2152 |
double d = 0.0; |
2152 |
double d = 0.0; |
| 2153 |
for (unsigned i = 0; i < n; ++i, ++d) |
2153 |
for (unsigned i = 0; i < n; ++i, ++d) |
| 2154 |
foo(d); |
2154 |
foo(d); |
| 2155 |
*/ |
2155 |
*/ |
| 2156 |
if (UIToFPInst *UCast = dyn_cast(CandidateUI->getUser())) { |
2156 |
if (UIToFPInst *UCast = dyn_cast(CandidateUI->getUser())) { |
| 2157 |
IsSigned = false; |
2157 |
IsSigned = false; |
| 2158 |
DestTy = UCast->getDestTy(); |
2158 |
DestTy = UCast->getDestTy(); |
| 2159 |
} |
2159 |
} |
| 2160 |
else if (SIToFPInst *SCast = dyn_cast(CandidateUI->getUser())) { |
2160 |
else if (SIToFPInst *SCast = dyn_cast(CandidateUI->getUser())) { |
| 2161 |
IsSigned = true; |
2161 |
IsSigned = true; |
| 2162 |
DestTy = SCast->getDestTy(); |
2162 |
DestTy = SCast->getDestTy(); |
| 2163 |
} |
2163 |
} |
| 2164 |
if (!DestTy) continue; |
2164 |
if (!DestTy) continue; |
| 2165 |
|
2165 |
|
| 2166 |
// If target does not support DestTy natively then do not apply |
2166 |
// If target does not support DestTy natively then do not apply |
| 2167 |
// this transformation. |
2167 |
// this transformation. |
| 2168 |
if (!TTI.isTypeLegal(DestTy)) continue; |
2168 |
if (!TTI.isTypeLegal(DestTy)) continue; |
| 2169 |
|
2169 |
|
| 2170 |
PHINode *PH = dyn_cast(ShadowUse->getOperand(0)); |
2170 |
PHINode *PH = dyn_cast(ShadowUse->getOperand(0)); |
| 2171 |
if (!PH) continue; |
2171 |
if (!PH) continue; |
| 2172 |
if (PH->getNumIncomingValues() != 2) continue; |
2172 |
if (PH->getNumIncomingValues() != 2) continue; |
| 2173 |
|
2173 |
|
| 2174 |
// If the calculation in integers overflows, the result in FP type will |
2174 |
// If the calculation in integers overflows, the result in FP type will |
| 2175 |
// differ. So we only can do this transformation if we are guaranteed to not |
2175 |
// differ. So we only can do this transformation if we are guaranteed to not |
| 2176 |
// deal with overflowing values |
2176 |
// deal with overflowing values |
| 2177 |
const SCEVAddRecExpr *AR = dyn_cast(SE.getSCEV(PH)); |
2177 |
const SCEVAddRecExpr *AR = dyn_cast(SE.getSCEV(PH)); |
| 2178 |
if (!AR) continue; |
2178 |
if (!AR) continue; |
| 2179 |
if (IsSigned && !AR->hasNoSignedWrap()) continue; |
2179 |
if (IsSigned && !AR->hasNoSignedWrap()) continue; |
| 2180 |
if (!IsSigned && !AR->hasNoUnsignedWrap()) continue; |
2180 |
if (!IsSigned && !AR->hasNoUnsignedWrap()) continue; |
| 2181 |
|
2181 |
|
| 2182 |
Type *SrcTy = PH->getType(); |
2182 |
Type *SrcTy = PH->getType(); |
| 2183 |
int Mantissa = DestTy->getFPMantissaWidth(); |
2183 |
int Mantissa = DestTy->getFPMantissaWidth(); |
| 2184 |
if (Mantissa == -1) continue; |
2184 |
if (Mantissa == -1) continue; |
| 2185 |
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) |
2185 |
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) |
| 2186 |
continue; |
2186 |
continue; |
| 2187 |
|
2187 |
|
| 2188 |
unsigned Entry, Latch; |
2188 |
unsigned Entry, Latch; |
| 2189 |
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { |
2189 |
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { |
| 2190 |
Entry = 0; |
2190 |
Entry = 0; |
| 2191 |
Latch = 1; |
2191 |
Latch = 1; |
| 2192 |
} else { |
2192 |
} else { |
| 2193 |
Entry = 1; |
2193 |
Entry = 1; |
| 2194 |
Latch = 0; |
2194 |
Latch = 0; |
| 2195 |
} |
2195 |
} |
| 2196 |
|
2196 |
|
| 2197 |
ConstantInt *Init = dyn_cast(PH->getIncomingValue(Entry)); |
2197 |
ConstantInt *Init = dyn_cast(PH->getIncomingValue(Entry)); |
| 2198 |
if (!Init) continue; |
2198 |
if (!Init) continue; |
| 2199 |
Constant *NewInit = ConstantFP::get(DestTy, IsSigned ? |
2199 |
Constant *NewInit = ConstantFP::get(DestTy, IsSigned ? |
| 2200 |
(double)Init->getSExtValue() : |
2200 |
(double)Init->getSExtValue() : |
| 2201 |
(double)Init->getZExtValue()); |
2201 |
(double)Init->getZExtValue()); |
| 2202 |
|
2202 |
|
| 2203 |
BinaryOperator *Incr = |
2203 |
BinaryOperator *Incr = |
| 2204 |
dyn_cast(PH->getIncomingValue(Latch)); |
2204 |
dyn_cast(PH->getIncomingValue(Latch)); |
| 2205 |
if (!Incr) continue; |
2205 |
if (!Incr) continue; |
| 2206 |
if (Incr->getOpcode() != Instruction::Add |
2206 |
if (Incr->getOpcode() != Instruction::Add |
| 2207 |
&& Incr->getOpcode() != Instruction::Sub) |
2207 |
&& Incr->getOpcode() != Instruction::Sub) |
| 2208 |
continue; |
2208 |
continue; |
| 2209 |
|
2209 |
|
| 2210 |
/* Initialize new IV, double d = 0.0 in above example. */ |
2210 |
/* Initialize new IV, double d = 0.0 in above example. */ |
| 2211 |
ConstantInt *C = nullptr; |
2211 |
ConstantInt *C = nullptr; |
| 2212 |
if (Incr->getOperand(0) == PH) |
2212 |
if (Incr->getOperand(0) == PH) |
| 2213 |
C = dyn_cast(Incr->getOperand(1)); |
2213 |
C = dyn_cast(Incr->getOperand(1)); |
| 2214 |
else if (Incr->getOperand(1) == PH) |
2214 |
else if (Incr->getOperand(1) == PH) |
| 2215 |
C = dyn_cast(Incr->getOperand(0)); |
2215 |
C = dyn_cast(Incr->getOperand(0)); |
| 2216 |
else |
2216 |
else |
| 2217 |
continue; |
2217 |
continue; |
| 2218 |
|
2218 |
|
| 2219 |
if (!C) continue; |
2219 |
if (!C) continue; |
| 2220 |
|
2220 |
|
| 2221 |
// Ignore negative constants, as the code below doesn't handle them |
2221 |
// Ignore negative constants, as the code below doesn't handle them |
| 2222 |
// correctly. TODO: Remove this restriction. |
2222 |
// correctly. TODO: Remove this restriction. |
| 2223 |
if (!C->getValue().isStrictlyPositive()) continue; |
2223 |
if (!C->getValue().isStrictlyPositive()) continue; |
| 2224 |
|
2224 |
|
| 2225 |
/* Add new PHINode. */ |
2225 |
/* Add new PHINode. */ |
| 2226 |
PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); |
2226 |
PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); |
| 2227 |
|
2227 |
|
| 2228 |
/* create new increment. '++d' in above example. */ |
2228 |
/* create new increment. '++d' in above example. */ |
| 2229 |
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); |
2229 |
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); |
| 2230 |
BinaryOperator *NewIncr = |
2230 |
BinaryOperator *NewIncr = |
| 2231 |
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? |
2231 |
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? |
| 2232 |
Instruction::FAdd : Instruction::FSub, |
2232 |
Instruction::FAdd : Instruction::FSub, |
| 2233 |
NewPH, CFP, "IV.S.next.", Incr); |
2233 |
NewPH, CFP, "IV.S.next.", Incr); |
| 2234 |
|
2234 |
|
| 2235 |
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); |
2235 |
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); |
| 2236 |
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); |
2236 |
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); |
| 2237 |
|
2237 |
|
| 2238 |
/* Remove cast operation */ |
2238 |
/* Remove cast operation */ |
| 2239 |
ShadowUse->replaceAllUsesWith(NewPH); |
2239 |
ShadowUse->replaceAllUsesWith(NewPH); |
| 2240 |
ShadowUse->eraseFromParent(); |
2240 |
ShadowUse->eraseFromParent(); |
| 2241 |
Changed = true; |
2241 |
Changed = true; |
| 2242 |
break; |
2242 |
break; |
| 2243 |
} |
2243 |
} |
| 2244 |
} |
2244 |
} |
| 2245 |
|
2245 |
|
| 2246 |
/// If Cond has an operand that is an expression of an IV, set the IV user and |
2246 |
/// If Cond has an operand that is an expression of an IV, set the IV user and |
| 2247 |
/// stride information and return true, otherwise return false. |
2247 |
/// stride information and return true, otherwise return false. |
| 2248 |
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { |
2248 |
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { |
| 2249 |
for (IVStrideUse &U : IU) |
2249 |
for (IVStrideUse &U : IU) |
| 2250 |
if (U.getUser() == Cond) { |
2250 |
if (U.getUser() == Cond) { |
| 2251 |
// NOTE: we could handle setcc instructions with multiple uses here, but |
2251 |
// NOTE: we could handle setcc instructions with multiple uses here, but |
| 2252 |
// InstCombine does it as well for simple uses, it's not clear that it |
2252 |
// InstCombine does it as well for simple uses, it's not clear that it |
| 2253 |
// occurs enough in real life to handle. |
2253 |
// occurs enough in real life to handle. |
| 2254 |
CondUse = &U; |
2254 |
CondUse = &U; |
| 2255 |
return true; |
2255 |
return true; |
| 2256 |
} |
2256 |
} |
| 2257 |
return false; |
2257 |
return false; |
| 2258 |
} |
2258 |
} |
| 2259 |
|
2259 |
|
| 2260 |
/// Rewrite the loop's terminating condition if it uses a max computation. |
2260 |
/// Rewrite the loop's terminating condition if it uses a max computation. |
| 2261 |
/// |
2261 |
/// |
| 2262 |
/// This is a narrow solution to a specific, but acute, problem. For loops |
2262 |
/// This is a narrow solution to a specific, but acute, problem. For loops |
| 2263 |
/// like this: |
2263 |
/// like this: |
| 2264 |
/// |
2264 |
/// |
| 2265 |
/// i = 0; |
2265 |
/// i = 0; |
| 2266 |
/// do { |
2266 |
/// do { |
| 2267 |
/// p[i] = 0.0; |
2267 |
/// p[i] = 0.0; |
| 2268 |
/// } while (++i < n); |
2268 |
/// } while (++i < n); |
| 2269 |
/// |
2269 |
/// |
| 2270 |
/// the trip count isn't just 'n', because 'n' might not be positive. And |
2270 |
/// the trip count isn't just 'n', because 'n' might not be positive. And |
| 2271 |
/// unfortunately this can come up even for loops where the user didn't use |
2271 |
/// unfortunately this can come up even for loops where the user didn't use |
| 2272 |
/// a C do-while loop. For example, seemingly well-behaved top-test loops |
2272 |
/// a C do-while loop. For example, seemingly well-behaved top-test loops |
| 2273 |
/// will commonly be lowered like this: |
2273 |
/// will commonly be lowered like this: |
| 2274 |
/// |
2274 |
/// |
| 2275 |
/// if (n > 0) { |
2275 |
/// if (n > 0) { |
| 2276 |
/// i = 0; |
2276 |
/// i = 0; |
| 2277 |
/// do { |
2277 |
/// do { |
| 2278 |
/// p[i] = 0.0; |
2278 |
/// p[i] = 0.0; |
| 2279 |
/// } while (++i < n); |
2279 |
/// } while (++i < n); |
| 2280 |
/// } |
2280 |
/// } |
| 2281 |
/// |
2281 |
/// |
| 2282 |
/// and then it's possible for subsequent optimization to obscure the if |
2282 |
/// and then it's possible for subsequent optimization to obscure the if |
| 2283 |
/// test in such a way that indvars can't find it. |
2283 |
/// test in such a way that indvars can't find it. |
| 2284 |
/// |
2284 |
/// |
| 2285 |
/// When indvars can't find the if test in loops like this, it creates a |
2285 |
/// When indvars can't find the if test in loops like this, it creates a |
| 2286 |
/// max expression, which allows it to give the loop a canonical |
2286 |
/// max expression, which allows it to give the loop a canonical |
| 2287 |
/// induction variable: |
2287 |
/// induction variable: |
| 2288 |
/// |
2288 |
/// |
| 2289 |
/// i = 0; |
2289 |
/// i = 0; |
| 2290 |
/// max = n < 1 ? 1 : n; |
2290 |
/// max = n < 1 ? 1 : n; |
| 2291 |
/// do { |
2291 |
/// do { |
| 2292 |
/// p[i] = 0.0; |
2292 |
/// p[i] = 0.0; |
| 2293 |
/// } while (++i != max); |
2293 |
/// } while (++i != max); |
| 2294 |
/// |
2294 |
/// |
| 2295 |
/// Canonical induction variables are necessary because the loop passes |
2295 |
/// Canonical induction variables are necessary because the loop passes |
| 2296 |
/// are designed around them. The most obvious example of this is the |
2296 |
/// are designed around them. The most obvious example of this is the |
| 2297 |
/// LoopInfo analysis, which doesn't remember trip count values. It |
2297 |
/// LoopInfo analysis, which doesn't remember trip count values. It |
| 2298 |
/// expects to be able to rediscover the trip count each time it is |
2298 |
/// expects to be able to rediscover the trip count each time it is |
| 2299 |
/// needed, and it does this using a simple analysis that only succeeds if |
2299 |
/// needed, and it does this using a simple analysis that only succeeds if |
| 2300 |
/// the loop has a canonical induction variable. |
2300 |
/// the loop has a canonical induction variable. |
| 2301 |
/// |
2301 |
/// |
| 2302 |
/// However, when it comes time to generate code, the maximum operation |
2302 |
/// However, when it comes time to generate code, the maximum operation |
| 2303 |
/// can be quite costly, especially if it's inside of an outer loop. |
2303 |
/// can be quite costly, especially if it's inside of an outer loop. |
| 2304 |
/// |
2304 |
/// |
| 2305 |
/// This function solves this problem by detecting this type of loop and |
2305 |
/// This function solves this problem by detecting this type of loop and |
| 2306 |
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting |
2306 |
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting |
| 2307 |
/// the instructions for the maximum computation. |
2307 |
/// the instructions for the maximum computation. |
| 2308 |
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { |
2308 |
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { |
| 2309 |
// Check that the loop matches the pattern we're looking for. |
2309 |
// Check that the loop matches the pattern we're looking for. |
| 2310 |
if (Cond->getPredicate() != CmpInst::ICMP_EQ && |
2310 |
if (Cond->getPredicate() != CmpInst::ICMP_EQ && |
| 2311 |
Cond->getPredicate() != CmpInst::ICMP_NE) |
2311 |
Cond->getPredicate() != CmpInst::ICMP_NE) |
| 2312 |
return Cond; |
2312 |
return Cond; |
| 2313 |
|
2313 |
|
| 2314 |
SelectInst *Sel = dyn_cast(Cond->getOperand(1)); |
2314 |
SelectInst *Sel = dyn_cast(Cond->getOperand(1)); |
| 2315 |
if (!Sel || !Sel->hasOneUse()) return Cond; |
2315 |
if (!Sel || !Sel->hasOneUse()) return Cond; |
| 2316 |
|
2316 |
|
| 2317 |
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); |
2317 |
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); |
| 2318 |
if (isa(BackedgeTakenCount)) |
2318 |
if (isa(BackedgeTakenCount)) |
| 2319 |
return Cond; |
2319 |
return Cond; |
| 2320 |
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); |
2320 |
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); |
| 2321 |
|
2321 |
|
| 2322 |
// Add one to the backedge-taken count to get the trip count. |
2322 |
// Add one to the backedge-taken count to get the trip count. |
| 2323 |
const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); |
2323 |
const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); |
| 2324 |
if (IterationCount != SE.getSCEV(Sel)) return Cond; |
2324 |
if (IterationCount != SE.getSCEV(Sel)) return Cond; |
| 2325 |
|
2325 |
|
| 2326 |
// Check for a max calculation that matches the pattern. There's no check |
2326 |
// Check for a max calculation that matches the pattern. There's no check |
| 2327 |
// for ICMP_ULE here because the comparison would be with zero, which |
2327 |
// for ICMP_ULE here because the comparison would be with zero, which |
| 2328 |
// isn't interesting. |
2328 |
// isn't interesting. |
| 2329 |
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; |
2329 |
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; |
| 2330 |
const SCEVNAryExpr *Max = nullptr; |
2330 |
const SCEVNAryExpr *Max = nullptr; |
| 2331 |
if (const SCEVSMaxExpr *S = dyn_cast(BackedgeTakenCount)) { |
2331 |
if (const SCEVSMaxExpr *S = dyn_cast(BackedgeTakenCount)) { |
| 2332 |
Pred = ICmpInst::ICMP_SLE; |
2332 |
Pred = ICmpInst::ICMP_SLE; |
| 2333 |
Max = S; |
2333 |
Max = S; |
| 2334 |
} else if (const SCEVSMaxExpr *S = dyn_cast(IterationCount)) { |
2334 |
} else if (const SCEVSMaxExpr *S = dyn_cast(IterationCount)) { |
| 2335 |
Pred = ICmpInst::ICMP_SLT; |
2335 |
Pred = ICmpInst::ICMP_SLT; |
| 2336 |
Max = S; |
2336 |
Max = S; |
| 2337 |
} else if (const SCEVUMaxExpr *U = dyn_cast(IterationCount)) { |
2337 |
} else if (const SCEVUMaxExpr *U = dyn_cast(IterationCount)) { |
| 2338 |
Pred = ICmpInst::ICMP_ULT; |
2338 |
Pred = ICmpInst::ICMP_ULT; |
| 2339 |
Max = U; |
2339 |
Max = U; |
| 2340 |
} else { |
2340 |
} else { |
| 2341 |
// No match; bail. |
2341 |
// No match; bail. |
| 2342 |
return Cond; |
2342 |
return Cond; |
| 2343 |
} |
2343 |
} |
| 2344 |
|
2344 |
|
| 2345 |
// To handle a max with more than two operands, this optimization would |
2345 |
// To handle a max with more than two operands, this optimization would |
| 2346 |
// require additional checking and setup. |
2346 |
// require additional checking and setup. |
| 2347 |
if (Max->getNumOperands() != 2) |
2347 |
if (Max->getNumOperands() != 2) |
| 2348 |
return Cond; |
2348 |
return Cond; |
| 2349 |
|
2349 |
|
| 2350 |
const SCEV *MaxLHS = Max->getOperand(0); |
2350 |
const SCEV *MaxLHS = Max->getOperand(0); |
| 2351 |
const SCEV *MaxRHS = Max->getOperand(1); |
2351 |
const SCEV *MaxRHS = Max->getOperand(1); |
| 2352 |
|
2352 |
|
| 2353 |
// ScalarEvolution canonicalizes constants to the left. For < and >, look |
2353 |
// ScalarEvolution canonicalizes constants to the left. For < and >, look |
| 2354 |
// for a comparison with 1. For <= and >=, a comparison with zero. |
2354 |
// for a comparison with 1. For <= and >=, a comparison with zero. |
| 2355 |
if (!MaxLHS || |
2355 |
if (!MaxLHS || |
| 2356 |
(ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One))) |
2356 |
(ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One))) |
| 2357 |
return Cond; |
2357 |
return Cond; |
| 2358 |
|
2358 |
|
| 2359 |
// Check the relevant induction variable for conformance to |
2359 |
// Check the relevant induction variable for conformance to |
| 2360 |
// the pattern. |
2360 |
// the pattern. |
| 2361 |
const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); |
2361 |
const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); |
| 2362 |
const SCEVAddRecExpr *AR = dyn_cast(IV); |
2362 |
const SCEVAddRecExpr *AR = dyn_cast(IV); |
| 2363 |
if (!AR || !AR->isAffine() || |
2363 |
if (!AR || !AR->isAffine() || |
| 2364 |
AR->getStart() != One || |
2364 |
AR->getStart() != One || |
| 2365 |
AR->getStepRecurrence(SE) != One) |
2365 |
AR->getStepRecurrence(SE) != One) |
| 2366 |
return Cond; |
2366 |
return Cond; |
| 2367 |
|
2367 |
|
| 2368 |
assert(AR->getLoop() == L && |
2368 |
assert(AR->getLoop() == L && |
| 2369 |
"Loop condition operand is an addrec in a different loop!"); |
2369 |
"Loop condition operand is an addrec in a different loop!"); |
| 2370 |
|
2370 |
|
| 2371 |
// Check the right operand of the select, and remember it, as it will |
2371 |
// Check the right operand of the select, and remember it, as it will |
| 2372 |
// be used in the new comparison instruction. |
2372 |
// be used in the new comparison instruction. |
| 2373 |
Value *NewRHS = nullptr; |
2373 |
Value *NewRHS = nullptr; |
| 2374 |
if (ICmpInst::isTrueWhenEqual(Pred)) { |
2374 |
if (ICmpInst::isTrueWhenEqual(Pred)) { |
| 2375 |
// Look for n+1, and grab n. |
2375 |
// Look for n+1, and grab n. |
| 2376 |
if (AddOperator *BO = dyn_cast(Sel->getOperand(1))) |
2376 |
if (AddOperator *BO = dyn_cast(Sel->getOperand(1))) |
| 2377 |
if (ConstantInt *BO1 = dyn_cast(BO->getOperand(1))) |
2377 |
if (ConstantInt *BO1 = dyn_cast(BO->getOperand(1))) |
| 2378 |
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) |
2378 |
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) |
| 2379 |
NewRHS = BO->getOperand(0); |
2379 |
NewRHS = BO->getOperand(0); |
| 2380 |
if (AddOperator *BO = dyn_cast(Sel->getOperand(2))) |
2380 |
if (AddOperator *BO = dyn_cast(Sel->getOperand(2))) |
| 2381 |
if (ConstantInt *BO1 = dyn_cast(BO->getOperand(1))) |
2381 |
if (ConstantInt *BO1 = dyn_cast(BO->getOperand(1))) |
| 2382 |
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) |
2382 |
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) |
| 2383 |
NewRHS = BO->getOperand(0); |
2383 |
NewRHS = BO->getOperand(0); |
| 2384 |
if (!NewRHS) |
2384 |
if (!NewRHS) |
| 2385 |
return Cond; |
2385 |
return Cond; |
| 2386 |
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) |
2386 |
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) |
| 2387 |
NewRHS = Sel->getOperand(1); |
2387 |
NewRHS = Sel->getOperand(1); |
| 2388 |
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) |
2388 |
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) |
| 2389 |
NewRHS = Sel->getOperand(2); |
2389 |
NewRHS = Sel->getOperand(2); |
| 2390 |
else if (const SCEVUnknown *SU = dyn_cast(MaxRHS)) |
2390 |
else if (const SCEVUnknown *SU = dyn_cast(MaxRHS)) |
| 2391 |
NewRHS = SU->getValue(); |
2391 |
NewRHS = SU->getValue(); |
| 2392 |
else |
2392 |
else |
| 2393 |
// Max doesn't match expected pattern. |
2393 |
// Max doesn't match expected pattern. |
| 2394 |
return Cond; |
2394 |
return Cond; |
| 2395 |
|
2395 |
|
| 2396 |
// Determine the new comparison opcode. It may be signed or unsigned, |
2396 |
// Determine the new comparison opcode. It may be signed or unsigned, |
| 2397 |
// and the original comparison may be either equality or inequality. |
2397 |
// and the original comparison may be either equality or inequality. |
| 2398 |
if (Cond->getPredicate() == CmpInst::ICMP_EQ) |
2398 |
if (Cond->getPredicate() == CmpInst::ICMP_EQ) |
| 2399 |
Pred = CmpInst::getInversePredicate(Pred); |
2399 |
Pred = CmpInst::getInversePredicate(Pred); |
| 2400 |
|
2400 |
|
| 2401 |
// Ok, everything looks ok to change the condition into an SLT or SGE and |
2401 |
// Ok, everything looks ok to change the condition into an SLT or SGE and |
| 2402 |
// delete the max calculation. |
2402 |
// delete the max calculation. |
| 2403 |
ICmpInst *NewCond = |
2403 |
ICmpInst *NewCond = |
| 2404 |
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp"); |
2404 |
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp"); |
| 2405 |
|
2405 |
|
| 2406 |
// Delete the max calculation instructions. |
2406 |
// Delete the max calculation instructions. |
| 2407 |
NewCond->setDebugLoc(Cond->getDebugLoc()); |
2407 |
NewCond->setDebugLoc(Cond->getDebugLoc()); |
| 2408 |
Cond->replaceAllUsesWith(NewCond); |
2408 |
Cond->replaceAllUsesWith(NewCond); |
| 2409 |
CondUse->setUser(NewCond); |
2409 |
CondUse->setUser(NewCond); |
| 2410 |
Instruction *Cmp = cast(Sel->getOperand(0)); |
2410 |
Instruction *Cmp = cast(Sel->getOperand(0)); |
| 2411 |
Cond->eraseFromParent(); |
2411 |
Cond->eraseFromParent(); |
| 2412 |
Sel->eraseFromParent(); |
2412 |
Sel->eraseFromParent(); |
| 2413 |
if (Cmp->use_empty()) |
2413 |
if (Cmp->use_empty()) |
| 2414 |
Cmp->eraseFromParent(); |
2414 |
Cmp->eraseFromParent(); |
| 2415 |
return NewCond; |
2415 |
return NewCond; |
| 2416 |
} |
2416 |
} |
| 2417 |
|
2417 |
|
| 2418 |
/// Change loop terminating condition to use the postinc iv when possible. |
2418 |
/// Change loop terminating condition to use the postinc iv when possible. |
| 2419 |
void |
2419 |
void |
| 2420 |
LSRInstance::OptimizeLoopTermCond() { |
2420 |
LSRInstance::OptimizeLoopTermCond() { |
| 2421 |
SmallPtrSet PostIncs; |
2421 |
SmallPtrSet PostIncs; |
| 2422 |
|
2422 |
|
| 2423 |
// We need a different set of heuristics for rotated and non-rotated loops. |
2423 |
// We need a different set of heuristics for rotated and non-rotated loops. |
| 2424 |
// If a loop is rotated then the latch is also the backedge, so inserting |
2424 |
// If a loop is rotated then the latch is also the backedge, so inserting |
| 2425 |
// post-inc expressions just before the latch is ideal. To reduce live ranges |
2425 |
// post-inc expressions just before the latch is ideal. To reduce live ranges |
| 2426 |
// it also makes sense to rewrite terminating conditions to use post-inc |
2426 |
// it also makes sense to rewrite terminating conditions to use post-inc |
| 2427 |
// expressions. |
2427 |
// expressions. |
| 2428 |
// |
2428 |
// |
| 2429 |
// If the loop is not rotated then the latch is not a backedge; the latch |
2429 |
// If the loop is not rotated then the latch is not a backedge; the latch |
| 2430 |
// check is done in the loop head. Adding post-inc expressions before the |
2430 |
// check is done in the loop head. Adding post-inc expressions before the |
| 2431 |
// latch will cause overlapping live-ranges of pre-inc and post-inc expressions |
2431 |
// latch will cause overlapping live-ranges of pre-inc and post-inc expressions |
| 2432 |
// in the loop body. In this case we do *not* want to use post-inc expressions |
2432 |
// in the loop body. In this case we do *not* want to use post-inc expressions |
| 2433 |
// in the latch check, and we want to insert post-inc expressions before |
2433 |
// in the latch check, and we want to insert post-inc expressions before |
| 2434 |
// the backedge. |
2434 |
// the backedge. |
| 2435 |
BasicBlock *LatchBlock = L->getLoopLatch(); |
2435 |
BasicBlock *LatchBlock = L->getLoopLatch(); |
| 2436 |
SmallVector ExitingBlocks; |
2436 |
SmallVector ExitingBlocks; |
| 2437 |
L->getExitingBlocks(ExitingBlocks); |
2437 |
L->getExitingBlocks(ExitingBlocks); |
| 2438 |
if (!llvm::is_contained(ExitingBlocks, LatchBlock)) { |
2438 |
if (!llvm::is_contained(ExitingBlocks, LatchBlock)) { |
| 2439 |
// The backedge doesn't exit the loop; treat this as a head-tested loop. |
2439 |
// The backedge doesn't exit the loop; treat this as a head-tested loop. |
| 2440 |
IVIncInsertPos = LatchBlock->getTerminator(); |
2440 |
IVIncInsertPos = LatchBlock->getTerminator(); |
| 2441 |
return; |
2441 |
return; |
| 2442 |
} |
2442 |
} |
| 2443 |
|
2443 |
|
| 2444 |
// Otherwise treat this as a rotated loop. |
2444 |
// Otherwise treat this as a rotated loop. |
| 2445 |
for (BasicBlock *ExitingBlock : ExitingBlocks) { |
2445 |
for (BasicBlock *ExitingBlock : ExitingBlocks) { |
| 2446 |
// Get the terminating condition for the loop if possible. If we |
2446 |
// Get the terminating condition for the loop if possible. If we |
| 2447 |
// can, we want to change it to use a post-incremented version of its |
2447 |
// can, we want to change it to use a post-incremented version of its |
| 2448 |
// induction variable, to allow coalescing the live ranges for the IV into |
2448 |
// induction variable, to allow coalescing the live ranges for the IV into |
| 2449 |
// one register value. |
2449 |
// one register value. |
| 2450 |
|
2450 |
|
| 2451 |
BranchInst *TermBr = dyn_cast(ExitingBlock->getTerminator()); |
2451 |
BranchInst *TermBr = dyn_cast(ExitingBlock->getTerminator()); |
| 2452 |
if (!TermBr) |
2452 |
if (!TermBr) |
| 2453 |
continue; |
2453 |
continue; |
| 2454 |
// FIXME: Overly conservative, termination condition could be an 'or' etc.. |
2454 |
// FIXME: Overly conservative, termination condition could be an 'or' etc.. |
| 2455 |
if (TermBr->isUnconditional() || !isa(TermBr->getCondition())) |
2455 |
if (TermBr->isUnconditional() || !isa(TermBr->getCondition())) |
| 2456 |
continue; |
2456 |
continue; |
| 2457 |
|
2457 |
|
| 2458 |
// Search IVUsesByStride to find Cond's IVUse if there is one. |
2458 |
// Search IVUsesByStride to find Cond's IVUse if there is one. |
| 2459 |
IVStrideUse *CondUse = nullptr; |
2459 |
IVStrideUse *CondUse = nullptr; |
| 2460 |
ICmpInst *Cond = cast(TermBr->getCondition()); |
2460 |
ICmpInst *Cond = cast(TermBr->getCondition()); |
| 2461 |
if (!FindIVUserForCond(Cond, CondUse)) |
2461 |
if (!FindIVUserForCond(Cond, CondUse)) |
| 2462 |
continue; |
2462 |
continue; |
| 2463 |
|
2463 |
|
| 2464 |
// If the trip count is computed in terms of a max (due to ScalarEvolution |
2464 |
// If the trip count is computed in terms of a max (due to ScalarEvolution |
| 2465 |
// being unable to find a sufficient guard, for example), change the loop |
2465 |
// being unable to find a sufficient guard, for example), change the loop |
| 2466 |
// comparison to use SLT or ULT instead of NE. |
2466 |
// comparison to use SLT or ULT instead of NE. |
| 2467 |
// One consequence of doing this now is that it disrupts the count-down |
2467 |
// One consequence of doing this now is that it disrupts the count-down |
| 2468 |
// optimization. That's not always a bad thing though, because in such |
2468 |
// optimization. That's not always a bad thing though, because in such |
| 2469 |
// cases it may still be worthwhile to avoid a max. |
2469 |
// cases it may still be worthwhile to avoid a max. |
| 2470 |
Cond = OptimizeMax(Cond, CondUse); |
2470 |
Cond = OptimizeMax(Cond, CondUse); |
| 2471 |
|
2471 |
|
| 2472 |
// If this exiting block dominates the latch block, it may also use |
2472 |
// If this exiting block dominates the latch block, it may also use |
| 2473 |
// the post-inc value if it won't be shared with other uses. |
2473 |
// the post-inc value if it won't be shared with other uses. |
| 2474 |
// Check for dominance. |
2474 |
// Check for dominance. |
| 2475 |
if (!DT.dominates(ExitingBlock, LatchBlock)) |
2475 |
if (!DT.dominates(ExitingBlock, LatchBlock)) |
| 2476 |
continue; |
2476 |
continue; |
| 2477 |
|
2477 |
|
| 2478 |
// Conservatively avoid trying to use the post-inc value in non-latch |
2478 |
// Conservatively avoid trying to use the post-inc value in non-latch |
| 2479 |
// exits if there may be pre-inc users in intervening blocks. |
2479 |
// exits if there may be pre-inc users in intervening blocks. |
| 2480 |
if (LatchBlock != ExitingBlock) |
2480 |
if (LatchBlock != ExitingBlock) |
| 2481 |
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) |
2481 |
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) |
| 2482 |
// Test if the use is reachable from the exiting block. This dominator |
2482 |
// Test if the use is reachable from the exiting block. This dominator |
| 2483 |
// query is a conservative approximation of reachability. |
2483 |
// query is a conservative approximation of reachability. |
| 2484 |
if (&*UI != CondUse && |
2484 |
if (&*UI != CondUse && |
| 2485 |
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { |
2485 |
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { |
| 2486 |
// Conservatively assume there may be reuse if the quotient of their |
2486 |
// Conservatively assume there may be reuse if the quotient of their |
| 2487 |
// strides could be a legal scale. |
2487 |
// strides could be a legal scale. |
| 2488 |
const SCEV *A = IU.getStride(*CondUse, L); |
2488 |
const SCEV *A = IU.getStride(*CondUse, L); |
| 2489 |
const SCEV *B = IU.getStride(*UI, L); |
2489 |
const SCEV *B = IU.getStride(*UI, L); |
| 2490 |
if (!A || !B) continue; |
2490 |
if (!A || !B) continue; |
| 2491 |
if (SE.getTypeSizeInBits(A->getType()) != |
2491 |
if (SE.getTypeSizeInBits(A->getType()) != |
| 2492 |
SE.getTypeSizeInBits(B->getType())) { |
2492 |
SE.getTypeSizeInBits(B->getType())) { |
| 2493 |
if (SE.getTypeSizeInBits(A->getType()) > |
2493 |
if (SE.getTypeSizeInBits(A->getType()) > |
| 2494 |
SE.getTypeSizeInBits(B->getType())) |
2494 |
SE.getTypeSizeInBits(B->getType())) |
| 2495 |
B = SE.getSignExtendExpr(B, A->getType()); |
2495 |
B = SE.getSignExtendExpr(B, A->getType()); |
| 2496 |
else |
2496 |
else |
| 2497 |
A = SE.getSignExtendExpr(A, B->getType()); |
2497 |
A = SE.getSignExtendExpr(A, B->getType()); |
| 2498 |
} |
2498 |
} |
| 2499 |
if (const SCEVConstant *D = |
2499 |
if (const SCEVConstant *D = |
| 2500 |
dyn_cast_or_null(getExactSDiv(B, A, SE))) { |
2500 |
dyn_cast_or_null(getExactSDiv(B, A, SE))) { |
| 2501 |
const ConstantInt *C = D->getValue(); |
2501 |
const ConstantInt *C = D->getValue(); |
| 2502 |
// Stride of one or negative one can have reuse with non-addresses. |
2502 |
// Stride of one or negative one can have reuse with non-addresses. |
| 2503 |
if (C->isOne() || C->isMinusOne()) |
2503 |
if (C->isOne() || C->isMinusOne()) |
| 2504 |
goto decline_post_inc; |
2504 |
goto decline_post_inc; |
| 2505 |
// Avoid weird situations. |
2505 |
// Avoid weird situations. |
| 2506 |
if (C->getValue().getSignificantBits() >= 64 || |
2506 |
if (C->getValue().getSignificantBits() >= 64 || |
| 2507 |
C->getValue().isMinSignedValue()) |
2507 |
C->getValue().isMinSignedValue()) |
| 2508 |
goto decline_post_inc; |
2508 |
goto decline_post_inc; |
| 2509 |
// Check for possible scaled-address reuse. |
2509 |
// Check for possible scaled-address reuse. |
| 2510 |
if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) { |
2510 |
if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) { |
| 2511 |
MemAccessTy AccessTy = getAccessType( |
2511 |
MemAccessTy AccessTy = getAccessType( |
| 2512 |
TTI, UI->getUser(), UI->getOperandValToReplace()); |
2512 |
TTI, UI->getUser(), UI->getOperandValToReplace()); |
| 2513 |
int64_t Scale = C->getSExtValue(); |
2513 |
int64_t Scale = C->getSExtValue(); |
| 2514 |
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, |
2514 |
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, |
| 2515 |
/*BaseOffset=*/0, |
2515 |
/*BaseOffset=*/0, |
| 2516 |
/*HasBaseReg=*/true, Scale, |
2516 |
/*HasBaseReg=*/true, Scale, |
| 2517 |
AccessTy.AddrSpace)) |
2517 |
AccessTy.AddrSpace)) |
| 2518 |
goto decline_post_inc; |
2518 |
goto decline_post_inc; |
| 2519 |
Scale = -Scale; |
2519 |
Scale = -Scale; |
| 2520 |
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, |
2520 |
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, |
| 2521 |
/*BaseOffset=*/0, |
2521 |
/*BaseOffset=*/0, |
| 2522 |
/*HasBaseReg=*/true, Scale, |
2522 |
/*HasBaseReg=*/true, Scale, |
| 2523 |
AccessTy.AddrSpace)) |
2523 |
AccessTy.AddrSpace)) |
| 2524 |
goto decline_post_inc; |
2524 |
goto decline_post_inc; |
| 2525 |
} |
2525 |
} |
| 2526 |
} |
2526 |
} |
| 2527 |
} |
2527 |
} |
| 2528 |
|
2528 |
|
| 2529 |
LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " |
2529 |
LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " |
| 2530 |
<< *Cond << '\n'); |
2530 |
<< *Cond << '\n'); |
| 2531 |
|
2531 |
|
| 2532 |
// It's possible for the setcc instruction to be anywhere in the loop, and |
2532 |
// It's possible for the setcc instruction to be anywhere in the loop, and |
| 2533 |
// possible for it to have multiple users. If it is not immediately before |
2533 |
// possible for it to have multiple users. If it is not immediately before |
| 2534 |
// the exiting block branch, move it. |
2534 |
// the exiting block branch, move it. |
| 2535 |
if (Cond->getNextNonDebugInstruction() != TermBr) { |
2535 |
if (Cond->getNextNonDebugInstruction() != TermBr) { |
| 2536 |
if (Cond->hasOneUse()) { |
2536 |
if (Cond->hasOneUse()) { |
| 2537 |
Cond->moveBefore(TermBr); |
2537 |
Cond->moveBefore(TermBr); |
| 2538 |
} else { |
2538 |
} else { |
| 2539 |
// Clone the terminating condition and insert into the loopend. |
2539 |
// Clone the terminating condition and insert into the loopend. |
| 2540 |
ICmpInst *OldCond = Cond; |
2540 |
ICmpInst *OldCond = Cond; |
| 2541 |
Cond = cast(Cond->clone()); |
2541 |
Cond = cast(Cond->clone()); |
| 2542 |
Cond->setName(L->getHeader()->getName() + ".termcond"); |
2542 |
Cond->setName(L->getHeader()->getName() + ".termcond"); |
| 2543 |
Cond->insertInto(ExitingBlock, TermBr->getIterator()); |
2543 |
Cond->insertInto(ExitingBlock, TermBr->getIterator()); |
| 2544 |
|
2544 |
|
| 2545 |
// Clone the IVUse, as the old use still exists! |
2545 |
// Clone the IVUse, as the old use still exists! |
| 2546 |
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); |
2546 |
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); |
| 2547 |
TermBr->replaceUsesOfWith(OldCond, Cond); |
2547 |
TermBr->replaceUsesOfWith(OldCond, Cond); |
| 2548 |
} |
2548 |
} |
| 2549 |
} |
2549 |
} |
| 2550 |
|
2550 |
|
| 2551 |
// If we get to here, we know that we can transform the setcc instruction to |
2551 |
// If we get to here, we know that we can transform the setcc instruction to |
| 2552 |
// use the post-incremented version of the IV, allowing us to coalesce the |
2552 |
// use the post-incremented version of the IV, allowing us to coalesce the |
| 2553 |
// live ranges for the IV correctly. |
2553 |
// live ranges for the IV correctly. |
| 2554 |
CondUse->transformToPostInc(L); |
2554 |
CondUse->transformToPostInc(L); |
| 2555 |
Changed = true; |
2555 |
Changed = true; |
| 2556 |
|
2556 |
|
| 2557 |
PostIncs.insert(Cond); |
2557 |
PostIncs.insert(Cond); |
| 2558 |
decline_post_inc:; |
2558 |
decline_post_inc:; |
| 2559 |
} |
2559 |
} |
| 2560 |
|
2560 |
|
| 2561 |
// Determine an insertion point for the loop induction variable increment. It |
2561 |
// Determine an insertion point for the loop induction variable increment. It |
| 2562 |
// must dominate all the post-inc comparisons we just set up, and it must |
2562 |
// must dominate all the post-inc comparisons we just set up, and it must |
| 2563 |
// dominate the loop latch edge. |
2563 |
// dominate the loop latch edge. |
| 2564 |
IVIncInsertPos = L->getLoopLatch()->getTerminator(); |
2564 |
IVIncInsertPos = L->getLoopLatch()->getTerminator(); |
| 2565 |
for (Instruction *Inst : PostIncs) |
2565 |
for (Instruction *Inst : PostIncs) |
| 2566 |
IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst); |
2566 |
IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst); |
| 2567 |
} |
2567 |
} |
| 2568 |
|
2568 |
|
| 2569 |
/// Determine if the given use can accommodate a fixup at the given offset and |
2569 |
/// Determine if the given use can accommodate a fixup at the given offset and |
| 2570 |
/// other details. If so, update the use and return true. |
2570 |
/// other details. If so, update the use and return true. |
| 2571 |
bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, |
2571 |
bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, |
| 2572 |
bool HasBaseReg, LSRUse::KindType Kind, |
2572 |
bool HasBaseReg, LSRUse::KindType Kind, |
| 2573 |
MemAccessTy AccessTy) { |
2573 |
MemAccessTy AccessTy) { |
| 2574 |
int64_t NewMinOffset = LU.MinOffset; |
2574 |
int64_t NewMinOffset = LU.MinOffset; |
| 2575 |
int64_t NewMaxOffset = LU.MaxOffset; |
2575 |
int64_t NewMaxOffset = LU.MaxOffset; |
| 2576 |
MemAccessTy NewAccessTy = AccessTy; |
2576 |
MemAccessTy NewAccessTy = AccessTy; |
| 2577 |
|
2577 |
|
| 2578 |
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to |
2578 |
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to |
| 2579 |
// something conservative, however this can pessimize in the case that one of |
2579 |
// something conservative, however this can pessimize in the case that one of |
| 2580 |
// the uses will have all its uses outside the loop, for example. |
2580 |
// the uses will have all its uses outside the loop, for example. |
| 2581 |
if (LU.Kind != Kind) |
2581 |
if (LU.Kind != Kind) |
| 2582 |
return false; |
2582 |
return false; |
| 2583 |
|
2583 |
|
| 2584 |
// Check for a mismatched access type, and fall back conservatively as needed. |
2584 |
// Check for a mismatched access type, and fall back conservatively as needed. |
| 2585 |
// TODO: Be less conservative when the type is similar and can use the same |
2585 |
// TODO: Be less conservative when the type is similar and can use the same |
| 2586 |
// addressing modes. |
2586 |
// addressing modes. |
| 2587 |
if (Kind == LSRUse::Address) { |
2587 |
if (Kind == LSRUse::Address) { |
| 2588 |
if (AccessTy.MemTy != LU.AccessTy.MemTy) { |
2588 |
if (AccessTy.MemTy != LU.AccessTy.MemTy) { |
| 2589 |
NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), |
2589 |
NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), |
| 2590 |
AccessTy.AddrSpace); |
2590 |
AccessTy.AddrSpace); |
| 2591 |
} |
2591 |
} |
| 2592 |
} |
2592 |
} |
| 2593 |
|
2593 |
|
| 2594 |
// Conservatively assume HasBaseReg is true for now. |
2594 |
// Conservatively assume HasBaseReg is true for now. |
| 2595 |
if (NewOffset < LU.MinOffset) { |
2595 |
if (NewOffset < LU.MinOffset) { |
| 2596 |
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, |
2596 |
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, |
| 2597 |
LU.MaxOffset - NewOffset, HasBaseReg)) |
2597 |
LU.MaxOffset - NewOffset, HasBaseReg)) |
| 2598 |
return false; |
2598 |
return false; |
| 2599 |
NewMinOffset = NewOffset; |
2599 |
NewMinOffset = NewOffset; |
| 2600 |
} else if (NewOffset > LU.MaxOffset) { |
2600 |
} else if (NewOffset > LU.MaxOffset) { |
| 2601 |
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, |
2601 |
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, |
| 2602 |
NewOffset - LU.MinOffset, HasBaseReg)) |
2602 |
NewOffset - LU.MinOffset, HasBaseReg)) |
| 2603 |
return false; |
2603 |
return false; |
| 2604 |
NewMaxOffset = NewOffset; |
2604 |
NewMaxOffset = NewOffset; |
| 2605 |
} |
2605 |
} |
| 2606 |
|
2606 |
|
| 2607 |
// Update the use. |
2607 |
// Update the use. |
| 2608 |
LU.MinOffset = NewMinOffset; |
2608 |
LU.MinOffset = NewMinOffset; |
| 2609 |
LU.MaxOffset = NewMaxOffset; |
2609 |
LU.MaxOffset = NewMaxOffset; |
| 2610 |
LU.AccessTy = NewAccessTy; |
2610 |
LU.AccessTy = NewAccessTy; |
| 2611 |
return true; |
2611 |
return true; |
| 2612 |
} |
2612 |
} |
| 2613 |
|
2613 |
|
| 2614 |
/// Return an LSRUse index and an offset value for a fixup which needs the given |
2614 |
/// Return an LSRUse index and an offset value for a fixup which needs the given |
| 2615 |
/// expression, with the given kind and optional access type. Either reuse an |
2615 |
/// expression, with the given kind and optional access type. Either reuse an |
| 2616 |
/// existing use or create a new one, as needed. |
2616 |
/// existing use or create a new one, as needed. |
| 2617 |
std::pair LSRInstance::getUse(const SCEV *&Expr, |
2617 |
std::pair LSRInstance::getUse(const SCEV *&Expr, |
| 2618 |
LSRUse::KindType Kind, |
2618 |
LSRUse::KindType Kind, |
| 2619 |
MemAccessTy AccessTy) { |
2619 |
MemAccessTy AccessTy) { |
| 2620 |
const SCEV *Copy = Expr; |
2620 |
const SCEV *Copy = Expr; |
| 2621 |
int64_t Offset = ExtractImmediate(Expr, SE); |
2621 |
int64_t Offset = ExtractImmediate(Expr, SE); |
| 2622 |
|
2622 |
|
| 2623 |
// Basic uses can't accept any offset, for example. |
2623 |
// Basic uses can't accept any offset, for example. |
| 2624 |
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, |
2624 |
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, |
| 2625 |
Offset, /*HasBaseReg=*/ true)) { |
2625 |
Offset, /*HasBaseReg=*/ true)) { |
| 2626 |
Expr = Copy; |
2626 |
Expr = Copy; |
| 2627 |
Offset = 0; |
2627 |
Offset = 0; |
| 2628 |
} |
2628 |
} |
| 2629 |
|
2629 |
|
| 2630 |
std::pair P = |
2630 |
std::pair P = |
| 2631 |
UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0)); |
2631 |
UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0)); |
| 2632 |
if (!P.second) { |
2632 |
if (!P.second) { |
| 2633 |
// A use already existed with this base. |
2633 |
// A use already existed with this base. |
| 2634 |
size_t LUIdx = P.first->second; |
2634 |
size_t LUIdx = P.first->second; |
| 2635 |
LSRUse &LU = Uses[LUIdx]; |
2635 |
LSRUse &LU = Uses[LUIdx]; |
| 2636 |
if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) |
2636 |
if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) |
| 2637 |
// Reuse this use. |
2637 |
// Reuse this use. |
| 2638 |
return std::make_pair(LUIdx, Offset); |
2638 |
return std::make_pair(LUIdx, Offset); |
| 2639 |
} |
2639 |
} |
| 2640 |
|
2640 |
|
| 2641 |
// Create a new use. |
2641 |
// Create a new use. |
| 2642 |
size_t LUIdx = Uses.size(); |
2642 |
size_t LUIdx = Uses.size(); |
| 2643 |
P.first->second = LUIdx; |
2643 |
P.first->second = LUIdx; |
| 2644 |
Uses.push_back(LSRUse(Kind, AccessTy)); |
2644 |
Uses.push_back(LSRUse(Kind, AccessTy)); |
| 2645 |
LSRUse &LU = Uses[LUIdx]; |
2645 |
LSRUse &LU = Uses[LUIdx]; |
| 2646 |
|
2646 |
|
| 2647 |
LU.MinOffset = Offset; |
2647 |
LU.MinOffset = Offset; |
| 2648 |
LU.MaxOffset = Offset; |
2648 |
LU.MaxOffset = Offset; |
| 2649 |
return std::make_pair(LUIdx, Offset); |
2649 |
return std::make_pair(LUIdx, Offset); |
| 2650 |
} |
2650 |
} |
| 2651 |
|
2651 |
|
| 2652 |
/// Delete the given use from the Uses list. |
2652 |
/// Delete the given use from the Uses list. |
| 2653 |
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { |
2653 |
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { |
| 2654 |
if (&LU != &Uses.back()) |
2654 |
if (&LU != &Uses.back()) |
| 2655 |
std::swap(LU, Uses.back()); |
2655 |
std::swap(LU, Uses.back()); |
| 2656 |
Uses.pop_back(); |
2656 |
Uses.pop_back(); |
| 2657 |
|
2657 |
|
| 2658 |
// Update RegUses. |
2658 |
// Update RegUses. |
| 2659 |
RegUses.swapAndDropUse(LUIdx, Uses.size()); |
2659 |
RegUses.swapAndDropUse(LUIdx, Uses.size()); |
| 2660 |
} |
2660 |
} |
| 2661 |
|
2661 |
|
| 2662 |
/// Look for a use distinct from OrigLU which is has a formula that has the same |
2662 |
/// Look for a use distinct from OrigLU which is has a formula that has the same |
| 2663 |
/// registers as the given formula. |
2663 |
/// registers as the given formula. |
| 2664 |
LSRUse * |
2664 |
LSRUse * |
| 2665 |
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, |
2665 |
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, |
| 2666 |
const LSRUse &OrigLU) { |
2666 |
const LSRUse &OrigLU) { |
| 2667 |
// Search all uses for the formula. This could be more clever. |
2667 |
// Search all uses for the formula. This could be more clever. |
| 2668 |
for (LSRUse &LU : Uses) { |
2668 |
for (LSRUse &LU : Uses) { |
| 2669 |
// Check whether this use is close enough to OrigLU, to see whether it's |
2669 |
// Check whether this use is close enough to OrigLU, to see whether it's |
| 2670 |
// worthwhile looking through its formulae. |
2670 |
// worthwhile looking through its formulae. |
| 2671 |
// Ignore ICmpZero uses because they may contain formulae generated by |
2671 |
// Ignore ICmpZero uses because they may contain formulae generated by |
| 2672 |
// GenerateICmpZeroScales, in which case adding fixup offsets may |
2672 |
// GenerateICmpZeroScales, in which case adding fixup offsets may |
| 2673 |
// be invalid. |
2673 |
// be invalid. |
| 2674 |
if (&LU != &OrigLU && |
2674 |
if (&LU != &OrigLU && |
| 2675 |
LU.Kind != LSRUse::ICmpZero && |
2675 |
LU.Kind != LSRUse::ICmpZero && |
| 2676 |
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && |
2676 |
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && |
| 2677 |
LU.WidestFixupType == OrigLU.WidestFixupType && |
2677 |
LU.WidestFixupType == OrigLU.WidestFixupType && |
| 2678 |
LU.HasFormulaWithSameRegs(OrigF)) { |
2678 |
LU.HasFormulaWithSameRegs(OrigF)) { |
| 2679 |
// Scan through this use's formulae. |
2679 |
// Scan through this use's formulae. |
| 2680 |
for (const Formula &F : LU.Formulae) { |
2680 |
for (const Formula &F : LU.Formulae) { |
| 2681 |
// Check to see if this formula has the same registers and symbols |
2681 |
// Check to see if this formula has the same registers and symbols |
| 2682 |
// as OrigF. |
2682 |
// as OrigF. |
| 2683 |
if (F.BaseRegs == OrigF.BaseRegs && |
2683 |
if (F.BaseRegs == OrigF.BaseRegs && |
| 2684 |
F.ScaledReg == OrigF.ScaledReg && |
2684 |
F.ScaledReg == OrigF.ScaledReg && |
| 2685 |
F.BaseGV == OrigF.BaseGV && |
2685 |
F.BaseGV == OrigF.BaseGV && |
| 2686 |
F.Scale == OrigF.Scale && |
2686 |
F.Scale == OrigF.Scale && |
| 2687 |
F.UnfoldedOffset == OrigF.UnfoldedOffset) { |
2687 |
F.UnfoldedOffset == OrigF.UnfoldedOffset) { |
| 2688 |
if (F.BaseOffset == 0) |
2688 |
if (F.BaseOffset == 0) |
| 2689 |
return &LU; |
2689 |
return &LU; |
| 2690 |
// This is the formula where all the registers and symbols matched; |
2690 |
// This is the formula where all the registers and symbols matched; |
| 2691 |
// there aren't going to be any others. Since we declined it, we |
2691 |
// there aren't going to be any others. Since we declined it, we |
| 2692 |
// can skip the rest of the formulae and proceed to the next LSRUse. |
2692 |
// can skip the rest of the formulae and proceed to the next LSRUse. |
| 2693 |
break; |
2693 |
break; |
| 2694 |
} |
2694 |
} |
| 2695 |
} |
2695 |
} |
| 2696 |
} |
2696 |
} |
| 2697 |
} |
2697 |
} |
| 2698 |
|
2698 |
|
| 2699 |
// Nothing looked good. |
2699 |
// Nothing looked good. |
| 2700 |
return nullptr; |
2700 |
return nullptr; |
| 2701 |
} |
2701 |
} |
| 2702 |
|
2702 |
|
| 2703 |
void LSRInstance::CollectInterestingTypesAndFactors() { |
2703 |
void LSRInstance::CollectInterestingTypesAndFactors() { |
| 2704 |
SmallSetVector Strides; |
2704 |
SmallSetVector Strides; |
| 2705 |
|
2705 |
|
| 2706 |
// Collect interesting types and strides. |
2706 |
// Collect interesting types and strides. |
| 2707 |
SmallVector Worklist; |
2707 |
SmallVector Worklist; |
| 2708 |
for (const IVStrideUse &U : IU) { |
2708 |
for (const IVStrideUse &U : IU) { |
| 2709 |
const SCEV *Expr = IU.getExpr(U); |
2709 |
const SCEV *Expr = IU.getExpr(U); |
| 2710 |
if (!Expr) |
2710 |
if (!Expr) |
| 2711 |
continue; |
2711 |
continue; |
| 2712 |
|
2712 |
|
| 2713 |
// Collect interesting types. |
2713 |
// Collect interesting types. |
| 2714 |
Types.insert(SE.getEffectiveSCEVType(Expr->getType())); |
2714 |
Types.insert(SE.getEffectiveSCEVType(Expr->getType())); |
| 2715 |
|
2715 |
|
| 2716 |
// Add strides for mentioned loops. |
2716 |
// Add strides for mentioned loops. |
| 2717 |
Worklist.push_back(Expr); |
2717 |
Worklist.push_back(Expr); |
| 2718 |
do { |
2718 |
do { |
| 2719 |
const SCEV *S = Worklist.pop_back_val(); |
2719 |
const SCEV *S = Worklist.pop_back_val(); |
| 2720 |
if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
2720 |
if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
| 2721 |
if (AR->getLoop() == L) |
2721 |
if (AR->getLoop() == L) |
| 2722 |
Strides.insert(AR->getStepRecurrence(SE)); |
2722 |
Strides.insert(AR->getStepRecurrence(SE)); |
| 2723 |
Worklist.push_back(AR->getStart()); |
2723 |
Worklist.push_back(AR->getStart()); |
| 2724 |
} else if (const SCEVAddExpr *Add = dyn_cast(S)) { |
2724 |
} else if (const SCEVAddExpr *Add = dyn_cast(S)) { |
| 2725 |
append_range(Worklist, Add->operands()); |
2725 |
append_range(Worklist, Add->operands()); |
| 2726 |
} |
2726 |
} |
| 2727 |
} while (!Worklist.empty()); |
2727 |
} while (!Worklist.empty()); |
| 2728 |
} |
2728 |
} |
| 2729 |
|
2729 |
|
| 2730 |
// Compute interesting factors from the set of interesting strides. |
2730 |
// Compute interesting factors from the set of interesting strides. |
| 2731 |
for (SmallSetVector::const_iterator |
2731 |
for (SmallSetVector::const_iterator |
| 2732 |
I = Strides.begin(), E = Strides.end(); I != E; ++I) |
2732 |
I = Strides.begin(), E = Strides.end(); I != E; ++I) |
| 2733 |
for (SmallSetVector::const_iterator NewStrideIter = |
2733 |
for (SmallSetVector::const_iterator NewStrideIter = |
| 2734 |
std::next(I); NewStrideIter != E; ++NewStrideIter) { |
2734 |
std::next(I); NewStrideIter != E; ++NewStrideIter) { |
| 2735 |
const SCEV *OldStride = *I; |
2735 |
const SCEV *OldStride = *I; |
| 2736 |
const SCEV *NewStride = *NewStrideIter; |
2736 |
const SCEV *NewStride = *NewStrideIter; |
| 2737 |
|
2737 |
|
| 2738 |
if (SE.getTypeSizeInBits(OldStride->getType()) != |
2738 |
if (SE.getTypeSizeInBits(OldStride->getType()) != |
| 2739 |
SE.getTypeSizeInBits(NewStride->getType())) { |
2739 |
SE.getTypeSizeInBits(NewStride->getType())) { |
| 2740 |
if (SE.getTypeSizeInBits(OldStride->getType()) > |
2740 |
if (SE.getTypeSizeInBits(OldStride->getType()) > |
| 2741 |
SE.getTypeSizeInBits(NewStride->getType())) |
2741 |
SE.getTypeSizeInBits(NewStride->getType())) |
| 2742 |
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType()); |
2742 |
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType()); |
| 2743 |
else |
2743 |
else |
| 2744 |
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType()); |
2744 |
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType()); |
| 2745 |
} |
2745 |
} |
| 2746 |
if (const SCEVConstant *Factor = |
2746 |
if (const SCEVConstant *Factor = |
| 2747 |
dyn_cast_or_null(getExactSDiv(NewStride, OldStride, |
2747 |
dyn_cast_or_null(getExactSDiv(NewStride, OldStride, |
| 2748 |
SE, true))) { |
2748 |
SE, true))) { |
| 2749 |
if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero()) |
2749 |
if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero()) |
| 2750 |
Factors.insert(Factor->getAPInt().getSExtValue()); |
2750 |
Factors.insert(Factor->getAPInt().getSExtValue()); |
| 2751 |
} else if (const SCEVConstant *Factor = |
2751 |
} else if (const SCEVConstant *Factor = |
| 2752 |
dyn_cast_or_null(getExactSDiv(OldStride, |
2752 |
dyn_cast_or_null(getExactSDiv(OldStride, |
| 2753 |
NewStride, |
2753 |
NewStride, |
| 2754 |
SE, true))) { |
2754 |
SE, true))) { |
| 2755 |
if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero()) |
2755 |
if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero()) |
| 2756 |
Factors.insert(Factor->getAPInt().getSExtValue()); |
2756 |
Factors.insert(Factor->getAPInt().getSExtValue()); |
| 2757 |
} |
2757 |
} |
| 2758 |
} |
2758 |
} |
| 2759 |
|
2759 |
|
| 2760 |
// If all uses use the same type, don't bother looking for truncation-based |
2760 |
// If all uses use the same type, don't bother looking for truncation-based |
| 2761 |
// reuse. |
2761 |
// reuse. |
| 2762 |
if (Types.size() == 1) |
2762 |
if (Types.size() == 1) |
| 2763 |
Types.clear(); |
2763 |
Types.clear(); |
| 2764 |
|
2764 |
|
| 2765 |
LLVM_DEBUG(print_factors_and_types(dbgs())); |
2765 |
LLVM_DEBUG(print_factors_and_types(dbgs())); |
| 2766 |
} |
2766 |
} |
| 2767 |
|
2767 |
|
| 2768 |
/// Helper for CollectChains that finds an IV operand (computed by an AddRec in |
2768 |
/// Helper for CollectChains that finds an IV operand (computed by an AddRec in |
| 2769 |
/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to |
2769 |
/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to |
| 2770 |
/// IVStrideUses, we could partially skip this. |
2770 |
/// IVStrideUses, we could partially skip this. |
| 2771 |
static User::op_iterator |
2771 |
static User::op_iterator |
| 2772 |
findIVOperand(User::op_iterator OI, User::op_iterator OE, |
2772 |
findIVOperand(User::op_iterator OI, User::op_iterator OE, |
| 2773 |
Loop *L, ScalarEvolution &SE) { |
2773 |
Loop *L, ScalarEvolution &SE) { |
| 2774 |
for(; OI != OE; ++OI) { |
2774 |
for(; OI != OE; ++OI) { |
| 2775 |
if (Instruction *Oper = dyn_cast(*OI)) { |
2775 |
if (Instruction *Oper = dyn_cast(*OI)) { |
| 2776 |
if (!SE.isSCEVable(Oper->getType())) |
2776 |
if (!SE.isSCEVable(Oper->getType())) |
| 2777 |
continue; |
2777 |
continue; |
| 2778 |
|
2778 |
|
| 2779 |
if (const SCEVAddRecExpr *AR = |
2779 |
if (const SCEVAddRecExpr *AR = |
| 2780 |
dyn_cast(SE.getSCEV(Oper))) { |
2780 |
dyn_cast(SE.getSCEV(Oper))) { |
| 2781 |
if (AR->getLoop() == L) |
2781 |
if (AR->getLoop() == L) |
| 2782 |
break; |
2782 |
break; |
| 2783 |
} |
2783 |
} |
| 2784 |
} |
2784 |
} |
| 2785 |
} |
2785 |
} |
| 2786 |
return OI; |
2786 |
return OI; |
| 2787 |
} |
2787 |
} |
| 2788 |
|
2788 |
|
| 2789 |
/// IVChain logic must consistently peek base TruncInst operands, so wrap it in |
2789 |
/// IVChain logic must consistently peek base TruncInst operands, so wrap it in |
| 2790 |
/// a convenient helper. |
2790 |
/// a convenient helper. |
| 2791 |
static Value *getWideOperand(Value *Oper) { |
2791 |
static Value *getWideOperand(Value *Oper) { |
| 2792 |
if (TruncInst *Trunc = dyn_cast(Oper)) |
2792 |
if (TruncInst *Trunc = dyn_cast(Oper)) |
| 2793 |
return Trunc->getOperand(0); |
2793 |
return Trunc->getOperand(0); |
| 2794 |
return Oper; |
2794 |
return Oper; |
| 2795 |
} |
2795 |
} |
| 2796 |
|
2796 |
|
| 2797 |
/// Return true if we allow an IV chain to include both types. |
2797 |
/// Return true if we allow an IV chain to include both types. |
| 2798 |
static bool isCompatibleIVType(Value *LVal, Value *RVal) { |
2798 |
static bool isCompatibleIVType(Value *LVal, Value *RVal) { |
| 2799 |
Type *LType = LVal->getType(); |
2799 |
Type *LType = LVal->getType(); |
| 2800 |
Type *RType = RVal->getType(); |
2800 |
Type *RType = RVal->getType(); |
| 2801 |
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() && |
2801 |
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() && |
| 2802 |
// Different address spaces means (possibly) |
2802 |
// Different address spaces means (possibly) |
| 2803 |
// different types of the pointer implementation, |
2803 |
// different types of the pointer implementation, |
| 2804 |
// e.g. i16 vs i32 so disallow that. |
2804 |
// e.g. i16 vs i32 so disallow that. |
| 2805 |
(LType->getPointerAddressSpace() == |
2805 |
(LType->getPointerAddressSpace() == |
| 2806 |
RType->getPointerAddressSpace())); |
2806 |
RType->getPointerAddressSpace())); |
| 2807 |
} |
2807 |
} |
| 2808 |
|
2808 |
|
| 2809 |
/// Return an approximation of this SCEV expression's "base", or NULL for any |
2809 |
/// Return an approximation of this SCEV expression's "base", or NULL for any |
| 2810 |
/// constant. Returning the expression itself is conservative. Returning a |
2810 |
/// constant. Returning the expression itself is conservative. Returning a |
| 2811 |
/// deeper subexpression is more precise and valid as long as it isn't less |
2811 |
/// deeper subexpression is more precise and valid as long as it isn't less |
| 2812 |
/// complex than another subexpression. For expressions involving multiple |
2812 |
/// complex than another subexpression. For expressions involving multiple |
| 2813 |
/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids |
2813 |
/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids |
| 2814 |
/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i], |
2814 |
/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i], |
| 2815 |
/// IVInc==b-a. |
2815 |
/// IVInc==b-a. |
| 2816 |
/// |
2816 |
/// |
| 2817 |
/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost |
2817 |
/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost |
| 2818 |
/// SCEVUnknown, we simply return the rightmost SCEV operand. |
2818 |
/// SCEVUnknown, we simply return the rightmost SCEV operand. |
| 2819 |
static const SCEV *getExprBase(const SCEV *S) { |
2819 |
static const SCEV *getExprBase(const SCEV *S) { |
| 2820 |
switch (S->getSCEVType()) { |
2820 |
switch (S->getSCEVType()) { |
| 2821 |
default: // including scUnknown. |
2821 |
default: // including scUnknown. |
| 2822 |
return S; |
2822 |
return S; |
| 2823 |
case scConstant: |
2823 |
case scConstant: |
| 2824 |
case scVScale: |
2824 |
case scVScale: |
| 2825 |
return nullptr; |
2825 |
return nullptr; |
| 2826 |
case scTruncate: |
2826 |
case scTruncate: |
| 2827 |
return getExprBase(cast(S)->getOperand()); |
2827 |
return getExprBase(cast(S)->getOperand()); |
| 2828 |
case scZeroExtend: |
2828 |
case scZeroExtend: |
| 2829 |
return getExprBase(cast(S)->getOperand()); |
2829 |
return getExprBase(cast(S)->getOperand()); |
| 2830 |
case scSignExtend: |
2830 |
case scSignExtend: |
| 2831 |
return getExprBase(cast(S)->getOperand()); |
2831 |
return getExprBase(cast(S)->getOperand()); |
| 2832 |
case scAddExpr: { |
2832 |
case scAddExpr: { |
| 2833 |
// Skip over scaled operands (scMulExpr) to follow add operands as long as |
2833 |
// Skip over scaled operands (scMulExpr) to follow add operands as long as |
| 2834 |
// there's nothing more complex. |
2834 |
// there's nothing more complex. |
| 2835 |
// FIXME: not sure if we want to recognize negation. |
2835 |
// FIXME: not sure if we want to recognize negation. |
| 2836 |
const SCEVAddExpr *Add = cast(S); |
2836 |
const SCEVAddExpr *Add = cast(S); |
| 2837 |
for (const SCEV *SubExpr : reverse(Add->operands())) { |
2837 |
for (const SCEV *SubExpr : reverse(Add->operands())) { |
| 2838 |
if (SubExpr->getSCEVType() == scAddExpr) |
2838 |
if (SubExpr->getSCEVType() == scAddExpr) |
| 2839 |
return getExprBase(SubExpr); |
2839 |
return getExprBase(SubExpr); |
| 2840 |
|
2840 |
|
| 2841 |
if (SubExpr->getSCEVType() != scMulExpr) |
2841 |
if (SubExpr->getSCEVType() != scMulExpr) |
| 2842 |
return SubExpr; |
2842 |
return SubExpr; |
| 2843 |
} |
2843 |
} |
| 2844 |
return S; // all operands are scaled, be conservative. |
2844 |
return S; // all operands are scaled, be conservative. |
| 2845 |
} |
2845 |
} |
| 2846 |
case scAddRecExpr: |
2846 |
case scAddRecExpr: |
| 2847 |
return getExprBase(cast(S)->getStart()); |
2847 |
return getExprBase(cast(S)->getStart()); |
| 2848 |
} |
2848 |
} |
| 2849 |
llvm_unreachable("Unknown SCEV kind!"); |
2849 |
llvm_unreachable("Unknown SCEV kind!"); |
| 2850 |
} |
2850 |
} |
| 2851 |
|
2851 |
|
| 2852 |
/// Return true if the chain increment is profitable to expand into a loop |
2852 |
/// Return true if the chain increment is profitable to expand into a loop |
| 2853 |
/// invariant value, which may require its own register. A profitable chain |
2853 |
/// invariant value, which may require its own register. A profitable chain |
| 2854 |
/// increment will be an offset relative to the same base. We allow such offsets |
2854 |
/// increment will be an offset relative to the same base. We allow such offsets |
| 2855 |
/// to potentially be used as chain increment as long as it's not obviously |
2855 |
/// to potentially be used as chain increment as long as it's not obviously |
| 2856 |
/// expensive to expand using real instructions. |
2856 |
/// expensive to expand using real instructions. |
| 2857 |
bool IVChain::isProfitableIncrement(const SCEV *OperExpr, |
2857 |
bool IVChain::isProfitableIncrement(const SCEV *OperExpr, |
| 2858 |
const SCEV *IncExpr, |
2858 |
const SCEV *IncExpr, |
| 2859 |
ScalarEvolution &SE) { |
2859 |
ScalarEvolution &SE) { |
| 2860 |
// Aggressively form chains when -stress-ivchain. |
2860 |
// Aggressively form chains when -stress-ivchain. |
| 2861 |
if (StressIVChain) |
2861 |
if (StressIVChain) |
| 2862 |
return true; |
2862 |
return true; |
| 2863 |
|
2863 |
|
| 2864 |
// Do not replace a constant offset from IV head with a nonconstant IV |
2864 |
// Do not replace a constant offset from IV head with a nonconstant IV |
| 2865 |
// increment. |
2865 |
// increment. |
| 2866 |
if (!isa(IncExpr)) { |
2866 |
if (!isa(IncExpr)) { |
| 2867 |
const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand)); |
2867 |
const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand)); |
| 2868 |
if (isa(SE.getMinusSCEV(OperExpr, HeadExpr))) |
2868 |
if (isa(SE.getMinusSCEV(OperExpr, HeadExpr))) |
| 2869 |
return false; |
2869 |
return false; |
| 2870 |
} |
2870 |
} |
| 2871 |
|
2871 |
|
| 2872 |
SmallPtrSet Processed; |
2872 |
SmallPtrSet Processed; |
| 2873 |
return !isHighCostExpansion(IncExpr, Processed, SE); |
2873 |
return !isHighCostExpansion(IncExpr, Processed, SE); |
| 2874 |
} |
2874 |
} |
| 2875 |
|
2875 |
|
| 2876 |
/// Return true if the number of registers needed for the chain is estimated to |
2876 |
/// Return true if the number of registers needed for the chain is estimated to |
| 2877 |
/// be less than the number required for the individual IV users. First prohibit |
2877 |
/// be less than the number required for the individual IV users. First prohibit |
| 2878 |
/// any IV users that keep the IV live across increments (the Users set should |
2878 |
/// any IV users that keep the IV live across increments (the Users set should |
| 2879 |
/// be empty). Next count the number and type of increments in the chain. |
2879 |
/// be empty). Next count the number and type of increments in the chain. |
| 2880 |
/// |
2880 |
/// |
| 2881 |
/// Chaining IVs can lead to considerable code bloat if ISEL doesn't |
2881 |
/// Chaining IVs can lead to considerable code bloat if ISEL doesn't |
| 2882 |
/// effectively use postinc addressing modes. Only consider it profitable it the |
2882 |
/// effectively use postinc addressing modes. Only consider it profitable it the |
| 2883 |
/// increments can be computed in fewer registers when chained. |
2883 |
/// increments can be computed in fewer registers when chained. |
| 2884 |
/// |
2884 |
/// |
| 2885 |
/// TODO: Consider IVInc free if it's already used in another chains. |
2885 |
/// TODO: Consider IVInc free if it's already used in another chains. |
| 2886 |
static bool isProfitableChain(IVChain &Chain, |
2886 |
static bool isProfitableChain(IVChain &Chain, |
| 2887 |
SmallPtrSetImpl &Users, |
2887 |
SmallPtrSetImpl &Users, |
| 2888 |
ScalarEvolution &SE, |
2888 |
ScalarEvolution &SE, |
| 2889 |
const TargetTransformInfo &TTI) { |
2889 |
const TargetTransformInfo &TTI) { |
| 2890 |
if (StressIVChain) |
2890 |
if (StressIVChain) |
| 2891 |
return true; |
2891 |
return true; |
| 2892 |
|
2892 |
|
| 2893 |
if (!Chain.hasIncs()) |
2893 |
if (!Chain.hasIncs()) |
| 2894 |
return false; |
2894 |
return false; |
| 2895 |
|
2895 |
|
| 2896 |
if (!Users.empty()) { |
2896 |
if (!Users.empty()) { |
| 2897 |
LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; |
2897 |
LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; |
| 2898 |
for (Instruction *Inst |
2898 |
for (Instruction *Inst |
| 2899 |
: Users) { dbgs() << " " << *Inst << "\n"; }); |
2899 |
: Users) { dbgs() << " " << *Inst << "\n"; }); |
| 2900 |
return false; |
2900 |
return false; |
| 2901 |
} |
2901 |
} |
| 2902 |
assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); |
2902 |
assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); |
| 2903 |
|
2903 |
|
| 2904 |
// The chain itself may require a register, so intialize cost to 1. |
2904 |
// The chain itself may require a register, so intialize cost to 1. |
| 2905 |
int cost = 1; |
2905 |
int cost = 1; |
| 2906 |
|
2906 |
|
| 2907 |
// A complete chain likely eliminates the need for keeping the original IV in |
2907 |
// A complete chain likely eliminates the need for keeping the original IV in |
| 2908 |
// a register. LSR does not currently know how to form a complete chain unless |
2908 |
// a register. LSR does not currently know how to form a complete chain unless |
| 2909 |
// the header phi already exists. |
2909 |
// the header phi already exists. |
| 2910 |
if (isa(Chain.tailUserInst()) |
2910 |
if (isa(Chain.tailUserInst()) |
| 2911 |
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { |
2911 |
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { |
| 2912 |
--cost; |
2912 |
--cost; |
| 2913 |
} |
2913 |
} |
| 2914 |
const SCEV *LastIncExpr = nullptr; |
2914 |
const SCEV *LastIncExpr = nullptr; |
| 2915 |
unsigned NumConstIncrements = 0; |
2915 |
unsigned NumConstIncrements = 0; |
| 2916 |
unsigned NumVarIncrements = 0; |
2916 |
unsigned NumVarIncrements = 0; |
| 2917 |
unsigned NumReusedIncrements = 0; |
2917 |
unsigned NumReusedIncrements = 0; |
| 2918 |
|
2918 |
|
| 2919 |
if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst)) |
2919 |
if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst)) |
| 2920 |
return true; |
2920 |
return true; |
| 2921 |
|
2921 |
|
| 2922 |
for (const IVInc &Inc : Chain) { |
2922 |
for (const IVInc &Inc : Chain) { |
| 2923 |
if (TTI.isProfitableLSRChainElement(Inc.UserInst)) |
2923 |
if (TTI.isProfitableLSRChainElement(Inc.UserInst)) |
| 2924 |
return true; |
2924 |
return true; |
| 2925 |
if (Inc.IncExpr->isZero()) |
2925 |
if (Inc.IncExpr->isZero()) |
| 2926 |
continue; |
2926 |
continue; |
| 2927 |
|
2927 |
|
| 2928 |
// Incrementing by zero or some constant is neutral. We assume constants can |
2928 |
// Incrementing by zero or some constant is neutral. We assume constants can |
| 2929 |
// be folded into an addressing mode or an add's immediate operand. |
2929 |
// be folded into an addressing mode or an add's immediate operand. |
| 2930 |
if (isa(Inc.IncExpr)) { |
2930 |
if (isa(Inc.IncExpr)) { |
| 2931 |
++NumConstIncrements; |
2931 |
++NumConstIncrements; |
| 2932 |
continue; |
2932 |
continue; |
| 2933 |
} |
2933 |
} |
| 2934 |
|
2934 |
|
| 2935 |
if (Inc.IncExpr == LastIncExpr) |
2935 |
if (Inc.IncExpr == LastIncExpr) |
| 2936 |
++NumReusedIncrements; |
2936 |
++NumReusedIncrements; |
| 2937 |
else |
2937 |
else |
| 2938 |
++NumVarIncrements; |
2938 |
++NumVarIncrements; |
| 2939 |
|
2939 |
|
| 2940 |
LastIncExpr = Inc.IncExpr; |
2940 |
LastIncExpr = Inc.IncExpr; |
| 2941 |
} |
2941 |
} |
| 2942 |
// An IV chain with a single increment is handled by LSR's postinc |
2942 |
// An IV chain with a single increment is handled by LSR's postinc |
| 2943 |
// uses. However, a chain with multiple increments requires keeping the IV's |
2943 |
// uses. However, a chain with multiple increments requires keeping the IV's |
| 2944 |
// value live longer than it needs to be if chained. |
2944 |
// value live longer than it needs to be if chained. |
| 2945 |
if (NumConstIncrements > 1) |
2945 |
if (NumConstIncrements > 1) |
| 2946 |
--cost; |
2946 |
--cost; |
| 2947 |
|
2947 |
|
| 2948 |
// Materializing increment expressions in the preheader that didn't exist in |
2948 |
// Materializing increment expressions in the preheader that didn't exist in |
| 2949 |
// the original code may cost a register. For example, sign-extended array |
2949 |
// the original code may cost a register. For example, sign-extended array |
| 2950 |
// indices can produce ridiculous increments like this: |
2950 |
// indices can produce ridiculous increments like this: |
| 2951 |
// IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64))) |
2951 |
// IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64))) |
| 2952 |
cost += NumVarIncrements; |
2952 |
cost += NumVarIncrements; |
| 2953 |
|
2953 |
|
| 2954 |
// Reusing variable increments likely saves a register to hold the multiple of |
2954 |
// Reusing variable increments likely saves a register to hold the multiple of |
| 2955 |
// the stride. |
2955 |
// the stride. |
| 2956 |
cost -= NumReusedIncrements; |
2956 |
cost -= NumReusedIncrements; |
| 2957 |
|
2957 |
|
| 2958 |
LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost |
2958 |
LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost |
| 2959 |
<< "\n"); |
2959 |
<< "\n"); |
| 2960 |
|
2960 |
|
| 2961 |
return cost < 0; |
2961 |
return cost < 0; |
| 2962 |
} |
2962 |
} |
| 2963 |
|
2963 |
|
| 2964 |
/// Add this IV user to an existing chain or make it the head of a new chain. |
2964 |
/// Add this IV user to an existing chain or make it the head of a new chain. |
| 2965 |
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, |
2965 |
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, |
| 2966 |
SmallVectorImpl &ChainUsersVec) { |
2966 |
SmallVectorImpl &ChainUsersVec) { |
| 2967 |
// When IVs are used as types of varying widths, they are generally converted |
2967 |
// When IVs are used as types of varying widths, they are generally converted |
| 2968 |
// to a wider type with some uses remaining narrow under a (free) trunc. |
2968 |
// to a wider type with some uses remaining narrow under a (free) trunc. |
| 2969 |
Value *const NextIV = getWideOperand(IVOper); |
2969 |
Value *const NextIV = getWideOperand(IVOper); |
| 2970 |
const SCEV *const OperExpr = SE.getSCEV(NextIV); |
2970 |
const SCEV *const OperExpr = SE.getSCEV(NextIV); |
| 2971 |
const SCEV *const OperExprBase = getExprBase(OperExpr); |
2971 |
const SCEV *const OperExprBase = getExprBase(OperExpr); |
| 2972 |
|
2972 |
|
| 2973 |
// Visit all existing chains. Check if its IVOper can be computed as a |
2973 |
// Visit all existing chains. Check if its IVOper can be computed as a |
| 2974 |
// profitable loop invariant increment from the last link in the Chain. |
2974 |
// profitable loop invariant increment from the last link in the Chain. |
| 2975 |
unsigned ChainIdx = 0, NChains = IVChainVec.size(); |
2975 |
unsigned ChainIdx = 0, NChains = IVChainVec.size(); |
| 2976 |
const SCEV *LastIncExpr = nullptr; |
2976 |
const SCEV *LastIncExpr = nullptr; |
| 2977 |
for (; ChainIdx < NChains; ++ChainIdx) { |
2977 |
for (; ChainIdx < NChains; ++ChainIdx) { |
| 2978 |
IVChain &Chain = IVChainVec[ChainIdx]; |
2978 |
IVChain &Chain = IVChainVec[ChainIdx]; |
| 2979 |
|
2979 |
|
| 2980 |
// Prune the solution space aggressively by checking that both IV operands |
2980 |
// Prune the solution space aggressively by checking that both IV operands |
| 2981 |
// are expressions that operate on the same unscaled SCEVUnknown. This |
2981 |
// are expressions that operate on the same unscaled SCEVUnknown. This |
| 2982 |
// "base" will be canceled by the subsequent getMinusSCEV call. Checking |
2982 |
// "base" will be canceled by the subsequent getMinusSCEV call. Checking |
| 2983 |
// first avoids creating extra SCEV expressions. |
2983 |
// first avoids creating extra SCEV expressions. |
| 2984 |
if (!StressIVChain && Chain.ExprBase != OperExprBase) |
2984 |
if (!StressIVChain && Chain.ExprBase != OperExprBase) |
| 2985 |
continue; |
2985 |
continue; |
| 2986 |
|
2986 |
|
| 2987 |
Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand); |
2987 |
Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand); |
| 2988 |
if (!isCompatibleIVType(PrevIV, NextIV)) |
2988 |
if (!isCompatibleIVType(PrevIV, NextIV)) |
| 2989 |
continue; |
2989 |
continue; |
| 2990 |
|
2990 |
|
| 2991 |
// A phi node terminates a chain. |
2991 |
// A phi node terminates a chain. |
| 2992 |
if (isa(UserInst) && isa(Chain.tailUserInst())) |
2992 |
if (isa(UserInst) && isa(Chain.tailUserInst())) |
| 2993 |
continue; |
2993 |
continue; |
| 2994 |
|
2994 |
|
| 2995 |
// The increment must be loop-invariant so it can be kept in a register. |
2995 |
// The increment must be loop-invariant so it can be kept in a register. |
| 2996 |
const SCEV *PrevExpr = SE.getSCEV(PrevIV); |
2996 |
const SCEV *PrevExpr = SE.getSCEV(PrevIV); |
| 2997 |
const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); |
2997 |
const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); |
| 2998 |
if (isa(IncExpr) || !SE.isLoopInvariant(IncExpr, L)) |
2998 |
if (isa(IncExpr) || !SE.isLoopInvariant(IncExpr, L)) |
| 2999 |
continue; |
2999 |
continue; |
| 3000 |
|
3000 |
|
| 3001 |
if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) { |
3001 |
if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) { |
| 3002 |
LastIncExpr = IncExpr; |
3002 |
LastIncExpr = IncExpr; |
| 3003 |
break; |
3003 |
break; |
| 3004 |
} |
3004 |
} |
| 3005 |
} |
3005 |
} |
| 3006 |
// If we haven't found a chain, create a new one, unless we hit the max. Don't |
3006 |
// If we haven't found a chain, create a new one, unless we hit the max. Don't |
| 3007 |
// bother for phi nodes, because they must be last in the chain. |
3007 |
// bother for phi nodes, because they must be last in the chain. |
| 3008 |
if (ChainIdx == NChains) { |
3008 |
if (ChainIdx == NChains) { |
| 3009 |
if (isa(UserInst)) |
3009 |
if (isa(UserInst)) |
| 3010 |
return; |
3010 |
return; |
| 3011 |
if (NChains >= MaxChains && !StressIVChain) { |
3011 |
if (NChains >= MaxChains && !StressIVChain) { |
| 3012 |
LLVM_DEBUG(dbgs() << "IV Chain Limit\n"); |
3012 |
LLVM_DEBUG(dbgs() << "IV Chain Limit\n"); |
| 3013 |
return; |
3013 |
return; |
| 3014 |
} |
3014 |
} |
| 3015 |
LastIncExpr = OperExpr; |
3015 |
LastIncExpr = OperExpr; |
| 3016 |
// IVUsers may have skipped over sign/zero extensions. We don't currently |
3016 |
// IVUsers may have skipped over sign/zero extensions. We don't currently |
| 3017 |
// attempt to form chains involving extensions unless they can be hoisted |
3017 |
// attempt to form chains involving extensions unless they can be hoisted |
| 3018 |
// into this loop's AddRec. |
3018 |
// into this loop's AddRec. |
| 3019 |
if (!isa(LastIncExpr)) |
3019 |
if (!isa(LastIncExpr)) |
| 3020 |
return; |
3020 |
return; |
| 3021 |
++NChains; |
3021 |
++NChains; |
| 3022 |
IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr), |
3022 |
IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr), |
| 3023 |
OperExprBase)); |
3023 |
OperExprBase)); |
| 3024 |
ChainUsersVec.resize(NChains); |
3024 |
ChainUsersVec.resize(NChains); |
| 3025 |
LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst |
3025 |
LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst |
| 3026 |
<< ") IV=" << *LastIncExpr << "\n"); |
3026 |
<< ") IV=" << *LastIncExpr << "\n"); |
| 3027 |
} else { |
3027 |
} else { |
| 3028 |
LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst |
3028 |
LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst |
| 3029 |
<< ") IV+" << *LastIncExpr << "\n"); |
3029 |
<< ") IV+" << *LastIncExpr << "\n"); |
| 3030 |
// Add this IV user to the end of the chain. |
3030 |
// Add this IV user to the end of the chain. |
| 3031 |
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); |
3031 |
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); |
| 3032 |
} |
3032 |
} |
| 3033 |
IVChain &Chain = IVChainVec[ChainIdx]; |
3033 |
IVChain &Chain = IVChainVec[ChainIdx]; |
| 3034 |
|
3034 |
|
| 3035 |
SmallPtrSet &NearUsers = ChainUsersVec[ChainIdx].NearUsers; |
3035 |
SmallPtrSet &NearUsers = ChainUsersVec[ChainIdx].NearUsers; |
| 3036 |
// This chain's NearUsers become FarUsers. |
3036 |
// This chain's NearUsers become FarUsers. |
| 3037 |
if (!LastIncExpr->isZero()) { |
3037 |
if (!LastIncExpr->isZero()) { |
| 3038 |
ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(), |
3038 |
ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(), |
| 3039 |
NearUsers.end()); |
3039 |
NearUsers.end()); |
| 3040 |
NearUsers.clear(); |
3040 |
NearUsers.clear(); |
| 3041 |
} |
3041 |
} |
| 3042 |
|
3042 |
|
| 3043 |
// All other uses of IVOperand become near uses of the chain. |
3043 |
// All other uses of IVOperand become near uses of the chain. |
| 3044 |
// We currently ignore intermediate values within SCEV expressions, assuming |
3044 |
// We currently ignore intermediate values within SCEV expressions, assuming |
| 3045 |
// they will eventually be used be the current chain, or can be computed |
3045 |
// they will eventually be used be the current chain, or can be computed |
| 3046 |
// from one of the chain increments. To be more precise we could |
3046 |
// from one of the chain increments. To be more precise we could |
| 3047 |
// transitively follow its user and only add leaf IV users to the set. |
3047 |
// transitively follow its user and only add leaf IV users to the set. |
| 3048 |
for (User *U : IVOper->users()) { |
3048 |
for (User *U : IVOper->users()) { |
| 3049 |
Instruction *OtherUse = dyn_cast(U); |
3049 |
Instruction *OtherUse = dyn_cast(U); |
| 3050 |
if (!OtherUse) |
3050 |
if (!OtherUse) |
| 3051 |
continue; |
3051 |
continue; |
| 3052 |
// Uses in the chain will no longer be uses if the chain is formed. |
3052 |
// Uses in the chain will no longer be uses if the chain is formed. |
| 3053 |
// Include the head of the chain in this iteration (not Chain.begin()). |
3053 |
// Include the head of the chain in this iteration (not Chain.begin()). |
| 3054 |
IVChain::const_iterator IncIter = Chain.Incs.begin(); |
3054 |
IVChain::const_iterator IncIter = Chain.Incs.begin(); |
| 3055 |
IVChain::const_iterator IncEnd = Chain.Incs.end(); |
3055 |
IVChain::const_iterator IncEnd = Chain.Incs.end(); |
| 3056 |
for( ; IncIter != IncEnd; ++IncIter) { |
3056 |
for( ; IncIter != IncEnd; ++IncIter) { |
| 3057 |
if (IncIter->UserInst == OtherUse) |
3057 |
if (IncIter->UserInst == OtherUse) |
| 3058 |
break; |
3058 |
break; |
| 3059 |
} |
3059 |
} |
| 3060 |
if (IncIter != IncEnd) |
3060 |
if (IncIter != IncEnd) |
| 3061 |
continue; |
3061 |
continue; |
| 3062 |
|
3062 |
|
| 3063 |
if (SE.isSCEVable(OtherUse->getType()) |
3063 |
if (SE.isSCEVable(OtherUse->getType()) |
| 3064 |
&& !isa(SE.getSCEV(OtherUse)) |
3064 |
&& !isa(SE.getSCEV(OtherUse)) |
| 3065 |
&& IU.isIVUserOrOperand(OtherUse)) { |
3065 |
&& IU.isIVUserOrOperand(OtherUse)) { |
| 3066 |
continue; |
3066 |
continue; |
| 3067 |
} |
3067 |
} |
| 3068 |
NearUsers.insert(OtherUse); |
3068 |
NearUsers.insert(OtherUse); |
| 3069 |
} |
3069 |
} |
| 3070 |
|
3070 |
|
| 3071 |
// Since this user is part of the chain, it's no longer considered a use |
3071 |
// Since this user is part of the chain, it's no longer considered a use |
| 3072 |
// of the chain. |
3072 |
// of the chain. |
| 3073 |
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst); |
3073 |
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst); |
| 3074 |
} |
3074 |
} |
| 3075 |
|
3075 |
|
| 3076 |
/// Populate the vector of Chains. |
3076 |
/// Populate the vector of Chains. |
| 3077 |
/// |
3077 |
/// |
| 3078 |
/// This decreases ILP at the architecture level. Targets with ample registers, |
3078 |
/// This decreases ILP at the architecture level. Targets with ample registers, |
| 3079 |
/// multiple memory ports, and no register renaming probably don't want |
3079 |
/// multiple memory ports, and no register renaming probably don't want |
| 3080 |
/// this. However, such targets should probably disable LSR altogether. |
3080 |
/// this. However, such targets should probably disable LSR altogether. |
| 3081 |
/// |
3081 |
/// |
| 3082 |
/// The job of LSR is to make a reasonable choice of induction variables across |
3082 |
/// The job of LSR is to make a reasonable choice of induction variables across |
| 3083 |
/// the loop. Subsequent passes can easily "unchain" computation exposing more |
3083 |
/// the loop. Subsequent passes can easily "unchain" computation exposing more |
| 3084 |
/// ILP *within the loop* if the target wants it. |
3084 |
/// ILP *within the loop* if the target wants it. |
| 3085 |
/// |
3085 |
/// |
| 3086 |
/// Finding the best IV chain is potentially a scheduling problem. Since LSR |
3086 |
/// Finding the best IV chain is potentially a scheduling problem. Since LSR |
| 3087 |
/// will not reorder memory operations, it will recognize this as a chain, but |
3087 |
/// will not reorder memory operations, it will recognize this as a chain, but |
| 3088 |
/// will generate redundant IV increments. Ideally this would be corrected later |
3088 |
/// will generate redundant IV increments. Ideally this would be corrected later |
| 3089 |
/// by a smart scheduler: |
3089 |
/// by a smart scheduler: |
| 3090 |
/// = A[i] |
3090 |
/// = A[i] |
| 3091 |
/// = A[i+x] |
3091 |
/// = A[i+x] |
| 3092 |
/// A[i] = |
3092 |
/// A[i] = |
| 3093 |
/// A[i+x] = |
3093 |
/// A[i+x] = |
| 3094 |
/// |
3094 |
/// |
| 3095 |
/// TODO: Walk the entire domtree within this loop, not just the path to the |
3095 |
/// TODO: Walk the entire domtree within this loop, not just the path to the |
| 3096 |
/// loop latch. This will discover chains on side paths, but requires |
3096 |
/// loop latch. This will discover chains on side paths, but requires |
| 3097 |
/// maintaining multiple copies of the Chains state. |
3097 |
/// maintaining multiple copies of the Chains state. |
| 3098 |
void LSRInstance::CollectChains() { |
3098 |
void LSRInstance::CollectChains() { |
| 3099 |
LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n"); |
3099 |
LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n"); |
| 3100 |
SmallVector ChainUsersVec; |
3100 |
SmallVector ChainUsersVec; |
| 3101 |
|
3101 |
|
| 3102 |
SmallVector LatchPath; |
3102 |
SmallVector LatchPath; |
| 3103 |
BasicBlock *LoopHeader = L->getHeader(); |
3103 |
BasicBlock *LoopHeader = L->getHeader(); |
| 3104 |
for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch()); |
3104 |
for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch()); |
| 3105 |
Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) { |
3105 |
Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) { |
| 3106 |
LatchPath.push_back(Rung->getBlock()); |
3106 |
LatchPath.push_back(Rung->getBlock()); |
| 3107 |
} |
3107 |
} |
| 3108 |
LatchPath.push_back(LoopHeader); |
3108 |
LatchPath.push_back(LoopHeader); |
| 3109 |
|
3109 |
|
| 3110 |
// Walk the instruction stream from the loop header to the loop latch. |
3110 |
// Walk the instruction stream from the loop header to the loop latch. |
| 3111 |
for (BasicBlock *BB : reverse(LatchPath)) { |
3111 |
for (BasicBlock *BB : reverse(LatchPath)) { |
| 3112 |
for (Instruction &I : *BB) { |
3112 |
for (Instruction &I : *BB) { |
| 3113 |
// Skip instructions that weren't seen by IVUsers analysis. |
3113 |
// Skip instructions that weren't seen by IVUsers analysis. |
| 3114 |
if (isa(I) || !IU.isIVUserOrOperand(&I)) |
3114 |
if (isa(I) || !IU.isIVUserOrOperand(&I)) |
| 3115 |
continue; |
3115 |
continue; |
| 3116 |
|
3116 |
|
| 3117 |
// Ignore users that are part of a SCEV expression. This way we only |
3117 |
// Ignore users that are part of a SCEV expression. This way we only |
| 3118 |
// consider leaf IV Users. This effectively rediscovers a portion of |
3118 |
// consider leaf IV Users. This effectively rediscovers a portion of |
| 3119 |
// IVUsers analysis but in program order this time. |
3119 |
// IVUsers analysis but in program order this time. |
| 3120 |
if (SE.isSCEVable(I.getType()) && !isa(SE.getSCEV(&I))) |
3120 |
if (SE.isSCEVable(I.getType()) && !isa(SE.getSCEV(&I))) |
| 3121 |
continue; |
3121 |
continue; |
| 3122 |
|
3122 |
|
| 3123 |
// Remove this instruction from any NearUsers set it may be in. |
3123 |
// Remove this instruction from any NearUsers set it may be in. |
| 3124 |
for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); |
3124 |
for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); |
| 3125 |
ChainIdx < NChains; ++ChainIdx) { |
3125 |
ChainIdx < NChains; ++ChainIdx) { |
| 3126 |
ChainUsersVec[ChainIdx].NearUsers.erase(&I); |
3126 |
ChainUsersVec[ChainIdx].NearUsers.erase(&I); |
| 3127 |
} |
3127 |
} |
| 3128 |
// Search for operands that can be chained. |
3128 |
// Search for operands that can be chained. |
| 3129 |
SmallPtrSet UniqueOperands; |
3129 |
SmallPtrSet UniqueOperands; |
| 3130 |
User::op_iterator IVOpEnd = I.op_end(); |
3130 |
User::op_iterator IVOpEnd = I.op_end(); |
| 3131 |
User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE); |
3131 |
User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE); |
| 3132 |
while (IVOpIter != IVOpEnd) { |
3132 |
while (IVOpIter != IVOpEnd) { |
| 3133 |
Instruction *IVOpInst = cast(*IVOpIter); |
3133 |
Instruction *IVOpInst = cast(*IVOpIter); |
| 3134 |
if (UniqueOperands.insert(IVOpInst).second) |
3134 |
if (UniqueOperands.insert(IVOpInst).second) |
| 3135 |
ChainInstruction(&I, IVOpInst, ChainUsersVec); |
3135 |
ChainInstruction(&I, IVOpInst, ChainUsersVec); |
| 3136 |
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); |
3136 |
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); |
| 3137 |
} |
3137 |
} |
| 3138 |
} // Continue walking down the instructions. |
3138 |
} // Continue walking down the instructions. |
| 3139 |
} // Continue walking down the domtree. |
3139 |
} // Continue walking down the domtree. |
| 3140 |
// Visit phi backedges to determine if the chain can generate the IV postinc. |
3140 |
// Visit phi backedges to determine if the chain can generate the IV postinc. |
| 3141 |
for (PHINode &PN : L->getHeader()->phis()) { |
3141 |
for (PHINode &PN : L->getHeader()->phis()) { |
| 3142 |
if (!SE.isSCEVable(PN.getType())) |
3142 |
if (!SE.isSCEVable(PN.getType())) |
| 3143 |
continue; |
3143 |
continue; |
| 3144 |
|
3144 |
|
| 3145 |
Instruction *IncV = |
3145 |
Instruction *IncV = |
| 3146 |
dyn_cast(PN.getIncomingValueForBlock(L->getLoopLatch())); |
3146 |
dyn_cast(PN.getIncomingValueForBlock(L->getLoopLatch())); |
| 3147 |
if (IncV) |
3147 |
if (IncV) |
| 3148 |
ChainInstruction(&PN, IncV, ChainUsersVec); |
3148 |
ChainInstruction(&PN, IncV, ChainUsersVec); |
| 3149 |
} |
3149 |
} |
| 3150 |
// Remove any unprofitable chains. |
3150 |
// Remove any unprofitable chains. |
| 3151 |
unsigned ChainIdx = 0; |
3151 |
unsigned ChainIdx = 0; |
| 3152 |
for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); |
3152 |
for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); |
| 3153 |
UsersIdx < NChains; ++UsersIdx) { |
3153 |
UsersIdx < NChains; ++UsersIdx) { |
| 3154 |
if (!isProfitableChain(IVChainVec[UsersIdx], |
3154 |
if (!isProfitableChain(IVChainVec[UsersIdx], |
| 3155 |
ChainUsersVec[UsersIdx].FarUsers, SE, TTI)) |
3155 |
ChainUsersVec[UsersIdx].FarUsers, SE, TTI)) |
| 3156 |
continue; |
3156 |
continue; |
| 3157 |
// Preserve the chain at UsesIdx. |
3157 |
// Preserve the chain at UsesIdx. |
| 3158 |
if (ChainIdx != UsersIdx) |
3158 |
if (ChainIdx != UsersIdx) |
| 3159 |
IVChainVec[ChainIdx] = IVChainVec[UsersIdx]; |
3159 |
IVChainVec[ChainIdx] = IVChainVec[UsersIdx]; |
| 3160 |
FinalizeChain(IVChainVec[ChainIdx]); |
3160 |
FinalizeChain(IVChainVec[ChainIdx]); |
| 3161 |
++ChainIdx; |
3161 |
++ChainIdx; |
| 3162 |
} |
3162 |
} |
| 3163 |
IVChainVec.resize(ChainIdx); |
3163 |
IVChainVec.resize(ChainIdx); |
| 3164 |
} |
3164 |
} |
| 3165 |
|
3165 |
|
| 3166 |
void LSRInstance::FinalizeChain(IVChain &Chain) { |
3166 |
void LSRInstance::FinalizeChain(IVChain &Chain) { |
| 3167 |
assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); |
3167 |
assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); |
| 3168 |
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); |
3168 |
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); |
| 3169 |
|
3169 |
|
| 3170 |
for (const IVInc &Inc : Chain) { |
3170 |
for (const IVInc &Inc : Chain) { |
| 3171 |
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n"); |
3171 |
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n"); |
| 3172 |
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand); |
3172 |
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand); |
| 3173 |
assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand"); |
3173 |
assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand"); |
| 3174 |
IVIncSet.insert(UseI); |
3174 |
IVIncSet.insert(UseI); |
| 3175 |
} |
3175 |
} |
| 3176 |
} |
3176 |
} |
| 3177 |
|
3177 |
|
| 3178 |
/// Return true if the IVInc can be folded into an addressing mode. |
3178 |
/// Return true if the IVInc can be folded into an addressing mode. |
| 3179 |
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, |
3179 |
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, |
| 3180 |
Value *Operand, const TargetTransformInfo &TTI) { |
3180 |
Value *Operand, const TargetTransformInfo &TTI) { |
| 3181 |
const SCEVConstant *IncConst = dyn_cast(IncExpr); |
3181 |
const SCEVConstant *IncConst = dyn_cast(IncExpr); |
| 3182 |
if (!IncConst || !isAddressUse(TTI, UserInst, Operand)) |
3182 |
if (!IncConst || !isAddressUse(TTI, UserInst, Operand)) |
| 3183 |
return false; |
3183 |
return false; |
| 3184 |
|
3184 |
|
| 3185 |
if (IncConst->getAPInt().getSignificantBits() > 64) |
3185 |
if (IncConst->getAPInt().getSignificantBits() > 64) |
| 3186 |
return false; |
3186 |
return false; |
| 3187 |
|
3187 |
|
| 3188 |
MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); |
3188 |
MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); |
| 3189 |
int64_t IncOffset = IncConst->getValue()->getSExtValue(); |
3189 |
int64_t IncOffset = IncConst->getValue()->getSExtValue(); |
| 3190 |
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, |
3190 |
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, |
| 3191 |
IncOffset, /*HasBaseReg=*/false)) |
3191 |
IncOffset, /*HasBaseReg=*/false)) |
| 3192 |
return false; |
3192 |
return false; |
| 3193 |
|
3193 |
|
| 3194 |
return true; |
3194 |
return true; |
| 3195 |
} |
3195 |
} |
| 3196 |
|
3196 |
|
| 3197 |
/// Generate an add or subtract for each IVInc in a chain to materialize the IV |
3197 |
/// Generate an add or subtract for each IVInc in a chain to materialize the IV |
| 3198 |
/// user's operand from the previous IV user's operand. |
3198 |
/// user's operand from the previous IV user's operand. |
| 3199 |
void LSRInstance::GenerateIVChain(const IVChain &Chain, |
3199 |
void LSRInstance::GenerateIVChain(const IVChain &Chain, |
| 3200 |
SmallVectorImpl &DeadInsts) { |
3200 |
SmallVectorImpl &DeadInsts) { |
| 3201 |
// Find the new IVOperand for the head of the chain. It may have been replaced |
3201 |
// Find the new IVOperand for the head of the chain. It may have been replaced |
| 3202 |
// by LSR. |
3202 |
// by LSR. |
| 3203 |
const IVInc &Head = Chain.Incs[0]; |
3203 |
const IVInc &Head = Chain.Incs[0]; |
| 3204 |
User::op_iterator IVOpEnd = Head.UserInst->op_end(); |
3204 |
User::op_iterator IVOpEnd = Head.UserInst->op_end(); |
| 3205 |
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user. |
3205 |
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user. |
| 3206 |
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), |
3206 |
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), |
| 3207 |
IVOpEnd, L, SE); |
3207 |
IVOpEnd, L, SE); |
| 3208 |
Value *IVSrc = nullptr; |
3208 |
Value *IVSrc = nullptr; |
| 3209 |
while (IVOpIter != IVOpEnd) { |
3209 |
while (IVOpIter != IVOpEnd) { |
| 3210 |
IVSrc = getWideOperand(*IVOpIter); |
3210 |
IVSrc = getWideOperand(*IVOpIter); |
| 3211 |
|
3211 |
|
| 3212 |
// If this operand computes the expression that the chain needs, we may use |
3212 |
// If this operand computes the expression that the chain needs, we may use |
| 3213 |
// it. (Check this after setting IVSrc which is used below.) |
3213 |
// it. (Check this after setting IVSrc which is used below.) |
| 3214 |
// |
3214 |
// |
| 3215 |
// Note that if Head.IncExpr is wider than IVSrc, then this phi is too |
3215 |
// Note that if Head.IncExpr is wider than IVSrc, then this phi is too |
| 3216 |
// narrow for the chain, so we can no longer use it. We do allow using a |
3216 |
// narrow for the chain, so we can no longer use it. We do allow using a |
| 3217 |
// wider phi, assuming the LSR checked for free truncation. In that case we |
3217 |
// wider phi, assuming the LSR checked for free truncation. In that case we |
| 3218 |
// should already have a truncate on this operand such that |
3218 |
// should already have a truncate on this operand such that |
| 3219 |
// getSCEV(IVSrc) == IncExpr. |
3219 |
// getSCEV(IVSrc) == IncExpr. |
| 3220 |
if (SE.getSCEV(*IVOpIter) == Head.IncExpr |
3220 |
if (SE.getSCEV(*IVOpIter) == Head.IncExpr |
| 3221 |
|| SE.getSCEV(IVSrc) == Head.IncExpr) { |
3221 |
|| SE.getSCEV(IVSrc) == Head.IncExpr) { |
| 3222 |
break; |
3222 |
break; |
| 3223 |
} |
3223 |
} |
| 3224 |
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); |
3224 |
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); |
| 3225 |
} |
3225 |
} |
| 3226 |
if (IVOpIter == IVOpEnd) { |
3226 |
if (IVOpIter == IVOpEnd) { |
| 3227 |
// Gracefully give up on this chain. |
3227 |
// Gracefully give up on this chain. |
| 3228 |
LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); |
3228 |
LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); |
| 3229 |
return; |
3229 |
return; |
| 3230 |
} |
3230 |
} |
| 3231 |
assert(IVSrc && "Failed to find IV chain source"); |
3231 |
assert(IVSrc && "Failed to find IV chain source"); |
| 3232 |
|
3232 |
|
| 3233 |
LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); |
3233 |
LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); |
| 3234 |
Type *IVTy = IVSrc->getType(); |
3234 |
Type *IVTy = IVSrc->getType(); |
| 3235 |
Type *IntTy = SE.getEffectiveSCEVType(IVTy); |
3235 |
Type *IntTy = SE.getEffectiveSCEVType(IVTy); |
| 3236 |
const SCEV *LeftOverExpr = nullptr; |
3236 |
const SCEV *LeftOverExpr = nullptr; |
| 3237 |
for (const IVInc &Inc : Chain) { |
3237 |
for (const IVInc &Inc : Chain) { |
| 3238 |
Instruction *InsertPt = Inc.UserInst; |
3238 |
Instruction *InsertPt = Inc.UserInst; |
| 3239 |
if (isa(InsertPt)) |
3239 |
if (isa(InsertPt)) |
| 3240 |
InsertPt = L->getLoopLatch()->getTerminator(); |
3240 |
InsertPt = L->getLoopLatch()->getTerminator(); |
| 3241 |
|
3241 |
|
| 3242 |
// IVOper will replace the current IV User's operand. IVSrc is the IV |
3242 |
// IVOper will replace the current IV User's operand. IVSrc is the IV |
| 3243 |
// value currently held in a register. |
3243 |
// value currently held in a register. |
| 3244 |
Value *IVOper = IVSrc; |
3244 |
Value *IVOper = IVSrc; |
| 3245 |
if (!Inc.IncExpr->isZero()) { |
3245 |
if (!Inc.IncExpr->isZero()) { |
| 3246 |
// IncExpr was the result of subtraction of two narrow values, so must |
3246 |
// IncExpr was the result of subtraction of two narrow values, so must |
| 3247 |
// be signed. |
3247 |
// be signed. |
| 3248 |
const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy); |
3248 |
const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy); |
| 3249 |
LeftOverExpr = LeftOverExpr ? |
3249 |
LeftOverExpr = LeftOverExpr ? |
| 3250 |
SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr; |
3250 |
SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr; |
| 3251 |
} |
3251 |
} |
| 3252 |
if (LeftOverExpr && !LeftOverExpr->isZero()) { |
3252 |
if (LeftOverExpr && !LeftOverExpr->isZero()) { |
| 3253 |
// Expand the IV increment. |
3253 |
// Expand the IV increment. |
| 3254 |
Rewriter.clearPostInc(); |
3254 |
Rewriter.clearPostInc(); |
| 3255 |
Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt); |
3255 |
Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt); |
| 3256 |
const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc), |
3256 |
const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc), |
| 3257 |
SE.getUnknown(IncV)); |
3257 |
SE.getUnknown(IncV)); |
| 3258 |
IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); |
3258 |
IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); |
| 3259 |
|
3259 |
|
| 3260 |
// If an IV increment can't be folded, use it as the next IV value. |
3260 |
// If an IV increment can't be folded, use it as the next IV value. |
| 3261 |
if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) { |
3261 |
if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) { |
| 3262 |
assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); |
3262 |
assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); |
| 3263 |
IVSrc = IVOper; |
3263 |
IVSrc = IVOper; |
| 3264 |
LeftOverExpr = nullptr; |
3264 |
LeftOverExpr = nullptr; |
| 3265 |
} |
3265 |
} |
| 3266 |
} |
3266 |
} |
| 3267 |
Type *OperTy = Inc.IVOperand->getType(); |
3267 |
Type *OperTy = Inc.IVOperand->getType(); |
| 3268 |
if (IVTy != OperTy) { |
3268 |
if (IVTy != OperTy) { |
| 3269 |
assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && |
3269 |
assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && |
| 3270 |
"cannot extend a chained IV"); |
3270 |
"cannot extend a chained IV"); |
| 3271 |
IRBuilder<> Builder(InsertPt); |
3271 |
IRBuilder<> Builder(InsertPt); |
| 3272 |
IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain"); |
3272 |
IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain"); |
| 3273 |
} |
3273 |
} |
| 3274 |
Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper); |
3274 |
Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper); |
| 3275 |
if (auto *OperandIsInstr = dyn_cast(Inc.IVOperand)) |
3275 |
if (auto *OperandIsInstr = dyn_cast(Inc.IVOperand)) |
| 3276 |
DeadInsts.emplace_back(OperandIsInstr); |
3276 |
DeadInsts.emplace_back(OperandIsInstr); |
| 3277 |
} |
3277 |
} |
| 3278 |
// If LSR created a new, wider phi, we may also replace its postinc. We only |
3278 |
// If LSR created a new, wider phi, we may also replace its postinc. We only |
| 3279 |
// do this if we also found a wide value for the head of the chain. |
3279 |
// do this if we also found a wide value for the head of the chain. |
| 3280 |
if (isa(Chain.tailUserInst())) { |
3280 |
if (isa(Chain.tailUserInst())) { |
| 3281 |
for (PHINode &Phi : L->getHeader()->phis()) { |
3281 |
for (PHINode &Phi : L->getHeader()->phis()) { |
| 3282 |
if (!isCompatibleIVType(&Phi, IVSrc)) |
3282 |
if (!isCompatibleIVType(&Phi, IVSrc)) |
| 3283 |
continue; |
3283 |
continue; |
| 3284 |
Instruction *PostIncV = dyn_cast( |
3284 |
Instruction *PostIncV = dyn_cast( |
| 3285 |
Phi.getIncomingValueForBlock(L->getLoopLatch())); |
3285 |
Phi.getIncomingValueForBlock(L->getLoopLatch())); |
| 3286 |
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc))) |
3286 |
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc))) |
| 3287 |
continue; |
3287 |
continue; |
| 3288 |
Value *IVOper = IVSrc; |
3288 |
Value *IVOper = IVSrc; |
| 3289 |
Type *PostIncTy = PostIncV->getType(); |
3289 |
Type *PostIncTy = PostIncV->getType(); |
| 3290 |
if (IVTy != PostIncTy) { |
3290 |
if (IVTy != PostIncTy) { |
| 3291 |
assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types"); |
3291 |
assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types"); |
| 3292 |
IRBuilder<> Builder(L->getLoopLatch()->getTerminator()); |
3292 |
IRBuilder<> Builder(L->getLoopLatch()->getTerminator()); |
| 3293 |
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc()); |
3293 |
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc()); |
| 3294 |
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); |
3294 |
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); |
| 3295 |
} |
3295 |
} |
| 3296 |
Phi.replaceUsesOfWith(PostIncV, IVOper); |
3296 |
Phi.replaceUsesOfWith(PostIncV, IVOper); |
| 3297 |
DeadInsts.emplace_back(PostIncV); |
3297 |
DeadInsts.emplace_back(PostIncV); |
| 3298 |
} |
3298 |
} |
| 3299 |
} |
3299 |
} |
| 3300 |
} |
3300 |
} |
| 3301 |
|
3301 |
|
| 3302 |
void LSRInstance::CollectFixupsAndInitialFormulae() { |
3302 |
void LSRInstance::CollectFixupsAndInitialFormulae() { |
| 3303 |
BranchInst *ExitBranch = nullptr; |
3303 |
BranchInst *ExitBranch = nullptr; |
| 3304 |
bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI); |
3304 |
bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI); |
| 3305 |
|
3305 |
|
| 3306 |
// For calculating baseline cost |
3306 |
// For calculating baseline cost |
| 3307 |
SmallPtrSet Regs; |
3307 |
SmallPtrSet Regs; |
| 3308 |
DenseSet VisitedRegs; |
3308 |
DenseSet VisitedRegs; |
| 3309 |
DenseSet VisitedLSRUse; |
3309 |
DenseSet VisitedLSRUse; |
| 3310 |
|
3310 |
|
| 3311 |
for (const IVStrideUse &U : IU) { |
3311 |
for (const IVStrideUse &U : IU) { |
| 3312 |
Instruction *UserInst = U.getUser(); |
3312 |
Instruction *UserInst = U.getUser(); |
| 3313 |
// Skip IV users that are part of profitable IV Chains. |
3313 |
// Skip IV users that are part of profitable IV Chains. |
| 3314 |
User::op_iterator UseI = |
3314 |
User::op_iterator UseI = |
| 3315 |
find(UserInst->operands(), U.getOperandValToReplace()); |
3315 |
find(UserInst->operands(), U.getOperandValToReplace()); |
| 3316 |
assert(UseI != UserInst->op_end() && "cannot find IV operand"); |
3316 |
assert(UseI != UserInst->op_end() && "cannot find IV operand"); |
| 3317 |
if (IVIncSet.count(UseI)) { |
3317 |
if (IVIncSet.count(UseI)) { |
| 3318 |
LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n'); |
3318 |
LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n'); |
| 3319 |
continue; |
3319 |
continue; |
| 3320 |
} |
3320 |
} |
| 3321 |
|
3321 |
|
| 3322 |
LSRUse::KindType Kind = LSRUse::Basic; |
3322 |
LSRUse::KindType Kind = LSRUse::Basic; |
| 3323 |
MemAccessTy AccessTy; |
3323 |
MemAccessTy AccessTy; |
| 3324 |
if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) { |
3324 |
if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) { |
| 3325 |
Kind = LSRUse::Address; |
3325 |
Kind = LSRUse::Address; |
| 3326 |
AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace()); |
3326 |
AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace()); |
| 3327 |
} |
3327 |
} |
| 3328 |
|
3328 |
|
| 3329 |
const SCEV *S = IU.getExpr(U); |
3329 |
const SCEV *S = IU.getExpr(U); |
| 3330 |
if (!S) |
3330 |
if (!S) |
| 3331 |
continue; |
3331 |
continue; |
| 3332 |
PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops(); |
3332 |
PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops(); |
| 3333 |
|
3333 |
|
| 3334 |
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as |
3334 |
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as |
| 3335 |
// (N - i == 0), and this allows (N - i) to be the expression that we work |
3335 |
// (N - i == 0), and this allows (N - i) to be the expression that we work |
| 3336 |
// with rather than just N or i, so we can consider the register |
3336 |
// with rather than just N or i, so we can consider the register |
| 3337 |
// requirements for both N and i at the same time. Limiting this code to |
3337 |
// requirements for both N and i at the same time. Limiting this code to |
| 3338 |
// equality icmps is not a problem because all interesting loops use |
3338 |
// equality icmps is not a problem because all interesting loops use |
| 3339 |
// equality icmps, thanks to IndVarSimplify. |
3339 |
// equality icmps, thanks to IndVarSimplify. |
| 3340 |
if (ICmpInst *CI = dyn_cast(UserInst)) { |
3340 |
if (ICmpInst *CI = dyn_cast(UserInst)) { |
| 3341 |
// If CI can be saved in some target, like replaced inside hardware loop |
3341 |
// If CI can be saved in some target, like replaced inside hardware loop |
| 3342 |
// in PowerPC, no need to generate initial formulae for it. |
3342 |
// in PowerPC, no need to generate initial formulae for it. |
| 3343 |
if (SaveCmp && CI == dyn_cast(ExitBranch->getCondition())) |
3343 |
if (SaveCmp && CI == dyn_cast(ExitBranch->getCondition())) |
| 3344 |
continue; |
3344 |
continue; |
| 3345 |
if (CI->isEquality()) { |
3345 |
if (CI->isEquality()) { |
| 3346 |
// Swap the operands if needed to put the OperandValToReplace on the |
3346 |
// Swap the operands if needed to put the OperandValToReplace on the |
| 3347 |
// left, for consistency. |
3347 |
// left, for consistency. |
| 3348 |
Value *NV = CI->getOperand(1); |
3348 |
Value *NV = CI->getOperand(1); |
| 3349 |
if (NV == U.getOperandValToReplace()) { |
3349 |
if (NV == U.getOperandValToReplace()) { |
| 3350 |
CI->setOperand(1, CI->getOperand(0)); |
3350 |
CI->setOperand(1, CI->getOperand(0)); |
| 3351 |
CI->setOperand(0, NV); |
3351 |
CI->setOperand(0, NV); |
| 3352 |
NV = CI->getOperand(1); |
3352 |
NV = CI->getOperand(1); |
| 3353 |
Changed = true; |
3353 |
Changed = true; |
| 3354 |
} |
3354 |
} |
| 3355 |
|
3355 |
|
| 3356 |
// x == y --> x - y == 0 |
3356 |
// x == y --> x - y == 0 |
| 3357 |
const SCEV *N = SE.getSCEV(NV); |
3357 |
const SCEV *N = SE.getSCEV(NV); |
| 3358 |
if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) && |
3358 |
if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) && |
| 3359 |
(!NV->getType()->isPointerTy() || |
3359 |
(!NV->getType()->isPointerTy() || |
| 3360 |
SE.getPointerBase(N) == SE.getPointerBase(S))) { |
3360 |
SE.getPointerBase(N) == SE.getPointerBase(S))) { |
| 3361 |
// S is normalized, so normalize N before folding it into S |
3361 |
// S is normalized, so normalize N before folding it into S |
| 3362 |
// to keep the result normalized. |
3362 |
// to keep the result normalized. |
| 3363 |
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); |
3363 |
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); |
| 3364 |
if (!N) |
3364 |
if (!N) |
| 3365 |
continue; |
3365 |
continue; |
| 3366 |
Kind = LSRUse::ICmpZero; |
3366 |
Kind = LSRUse::ICmpZero; |
| 3367 |
S = SE.getMinusSCEV(N, S); |
3367 |
S = SE.getMinusSCEV(N, S); |
| 3368 |
} else if (L->isLoopInvariant(NV) && |
3368 |
} else if (L->isLoopInvariant(NV) && |
| 3369 |
(!isa(NV) || |
3369 |
(!isa(NV) || |
| 3370 |
DT.dominates(cast(NV), L->getHeader())) && |
3370 |
DT.dominates(cast(NV), L->getHeader())) && |
| 3371 |
!NV->getType()->isPointerTy()) { |
3371 |
!NV->getType()->isPointerTy()) { |
| 3372 |
// If we can't generally expand the expression (e.g. it contains |
3372 |
// If we can't generally expand the expression (e.g. it contains |
| 3373 |
// a divide), but it is already at a loop invariant point before the |
3373 |
// a divide), but it is already at a loop invariant point before the |
| 3374 |
// loop, wrap it in an unknown (to prevent the expander from trying |
3374 |
// loop, wrap it in an unknown (to prevent the expander from trying |
| 3375 |
// to re-expand in a potentially unsafe way.) The restriction to |
3375 |
// to re-expand in a potentially unsafe way.) The restriction to |
| 3376 |
// integer types is required because the unknown hides the base, and |
3376 |
// integer types is required because the unknown hides the base, and |
| 3377 |
// SCEV can't compute the difference of two unknown pointers. |
3377 |
// SCEV can't compute the difference of two unknown pointers. |
| 3378 |
N = SE.getUnknown(NV); |
3378 |
N = SE.getUnknown(NV); |
| 3379 |
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); |
3379 |
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); |
| 3380 |
if (!N) |
3380 |
if (!N) |
| 3381 |
continue; |
3381 |
continue; |
| 3382 |
Kind = LSRUse::ICmpZero; |
3382 |
Kind = LSRUse::ICmpZero; |
| 3383 |
S = SE.getMinusSCEV(N, S); |
3383 |
S = SE.getMinusSCEV(N, S); |
| 3384 |
assert(!isa(S)); |
3384 |
assert(!isa(S)); |
| 3385 |
} |
3385 |
} |
| 3386 |
|
3386 |
|
| 3387 |
// -1 and the negations of all interesting strides (except the negation |
3387 |
// -1 and the negations of all interesting strides (except the negation |
| 3388 |
// of -1) are now also interesting. |
3388 |
// of -1) are now also interesting. |
| 3389 |
for (size_t i = 0, e = Factors.size(); i != e; ++i) |
3389 |
for (size_t i = 0, e = Factors.size(); i != e; ++i) |
| 3390 |
if (Factors[i] != -1) |
3390 |
if (Factors[i] != -1) |
| 3391 |
Factors.insert(-(uint64_t)Factors[i]); |
3391 |
Factors.insert(-(uint64_t)Factors[i]); |
| 3392 |
Factors.insert(-1); |
3392 |
Factors.insert(-1); |
| 3393 |
} |
3393 |
} |
| 3394 |
} |
3394 |
} |
| 3395 |
|
3395 |
|
| 3396 |
// Get or create an LSRUse. |
3396 |
// Get or create an LSRUse. |
| 3397 |
std::pair P = getUse(S, Kind, AccessTy); |
3397 |
std::pair P = getUse(S, Kind, AccessTy); |
| 3398 |
size_t LUIdx = P.first; |
3398 |
size_t LUIdx = P.first; |
| 3399 |
int64_t Offset = P.second; |
3399 |
int64_t Offset = P.second; |
| 3400 |
LSRUse &LU = Uses[LUIdx]; |
3400 |
LSRUse &LU = Uses[LUIdx]; |
| 3401 |
|
3401 |
|
| 3402 |
// Record the fixup. |
3402 |
// Record the fixup. |
| 3403 |
LSRFixup &LF = LU.getNewFixup(); |
3403 |
LSRFixup &LF = LU.getNewFixup(); |
| 3404 |
LF.UserInst = UserInst; |
3404 |
LF.UserInst = UserInst; |
| 3405 |
LF.OperandValToReplace = U.getOperandValToReplace(); |
3405 |
LF.OperandValToReplace = U.getOperandValToReplace(); |
| 3406 |
LF.PostIncLoops = TmpPostIncLoops; |
3406 |
LF.PostIncLoops = TmpPostIncLoops; |
| 3407 |
LF.Offset = Offset; |
3407 |
LF.Offset = Offset; |
| 3408 |
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); |
3408 |
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); |
| 3409 |
|
3409 |
|
| 3410 |
// Create SCEV as Formula for calculating baseline cost |
3410 |
// Create SCEV as Formula for calculating baseline cost |
| 3411 |
if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) { |
3411 |
if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) { |
| 3412 |
Formula F; |
3412 |
Formula F; |
| 3413 |
F.initialMatch(S, L, SE); |
3413 |
F.initialMatch(S, L, SE); |
| 3414 |
BaselineCost.RateFormula(F, Regs, VisitedRegs, LU); |
3414 |
BaselineCost.RateFormula(F, Regs, VisitedRegs, LU); |
| 3415 |
VisitedLSRUse.insert(LUIdx); |
3415 |
VisitedLSRUse.insert(LUIdx); |
| 3416 |
} |
3416 |
} |
| 3417 |
|
3417 |
|
| 3418 |
if (!LU.WidestFixupType || |
3418 |
if (!LU.WidestFixupType || |
| 3419 |
SE.getTypeSizeInBits(LU.WidestFixupType) < |
3419 |
SE.getTypeSizeInBits(LU.WidestFixupType) < |
| 3420 |
SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) |
3420 |
SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) |
| 3421 |
LU.WidestFixupType = LF.OperandValToReplace->getType(); |
3421 |
LU.WidestFixupType = LF.OperandValToReplace->getType(); |
| 3422 |
|
3422 |
|
| 3423 |
// If this is the first use of this LSRUse, give it a formula. |
3423 |
// If this is the first use of this LSRUse, give it a formula. |
| 3424 |
if (LU.Formulae.empty()) { |
3424 |
if (LU.Formulae.empty()) { |
| 3425 |
InsertInitialFormula(S, LU, LUIdx); |
3425 |
InsertInitialFormula(S, LU, LUIdx); |
| 3426 |
CountRegisters(LU.Formulae.back(), LUIdx); |
3426 |
CountRegisters(LU.Formulae.back(), LUIdx); |
| 3427 |
} |
3427 |
} |
| 3428 |
} |
3428 |
} |
| 3429 |
|
3429 |
|
| 3430 |
LLVM_DEBUG(print_fixups(dbgs())); |
3430 |
LLVM_DEBUG(print_fixups(dbgs())); |
| 3431 |
} |
3431 |
} |
| 3432 |
|
3432 |
|
| 3433 |
/// Insert a formula for the given expression into the given use, separating out |
3433 |
/// Insert a formula for the given expression into the given use, separating out |
| 3434 |
/// loop-variant portions from loop-invariant and loop-computable portions. |
3434 |
/// loop-variant portions from loop-invariant and loop-computable portions. |
| 3435 |
void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, |
3435 |
void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, |
| 3436 |
size_t LUIdx) { |
3436 |
size_t LUIdx) { |
| 3437 |
// Mark uses whose expressions cannot be expanded. |
3437 |
// Mark uses whose expressions cannot be expanded. |
| 3438 |
if (!Rewriter.isSafeToExpand(S)) |
3438 |
if (!Rewriter.isSafeToExpand(S)) |
| 3439 |
LU.RigidFormula = true; |
3439 |
LU.RigidFormula = true; |
| 3440 |
|
3440 |
|
| 3441 |
Formula F; |
3441 |
Formula F; |
| 3442 |
F.initialMatch(S, L, SE); |
3442 |
F.initialMatch(S, L, SE); |
| 3443 |
bool Inserted = InsertFormula(LU, LUIdx, F); |
3443 |
bool Inserted = InsertFormula(LU, LUIdx, F); |
| 3444 |
assert(Inserted && "Initial formula already exists!"); (void)Inserted; |
3444 |
assert(Inserted && "Initial formula already exists!"); (void)Inserted; |
| 3445 |
} |
3445 |
} |
| 3446 |
|
3446 |
|
| 3447 |
/// Insert a simple single-register formula for the given expression into the |
3447 |
/// Insert a simple single-register formula for the given expression into the |
| 3448 |
/// given use. |
3448 |
/// given use. |
| 3449 |
void |
3449 |
void |
| 3450 |
LSRInstance::InsertSupplementalFormula(const SCEV *S, |
3450 |
LSRInstance::InsertSupplementalFormula(const SCEV *S, |
| 3451 |
LSRUse &LU, size_t LUIdx) { |
3451 |
LSRUse &LU, size_t LUIdx) { |
| 3452 |
Formula F; |
3452 |
Formula F; |
| 3453 |
F.BaseRegs.push_back(S); |
3453 |
F.BaseRegs.push_back(S); |
| 3454 |
F.HasBaseReg = true; |
3454 |
F.HasBaseReg = true; |
| 3455 |
bool Inserted = InsertFormula(LU, LUIdx, F); |
3455 |
bool Inserted = InsertFormula(LU, LUIdx, F); |
| 3456 |
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; |
3456 |
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; |
| 3457 |
} |
3457 |
} |
| 3458 |
|
3458 |
|
| 3459 |
/// Note which registers are used by the given formula, updating RegUses. |
3459 |
/// Note which registers are used by the given formula, updating RegUses. |
| 3460 |
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { |
3460 |
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { |
| 3461 |
if (F.ScaledReg) |
3461 |
if (F.ScaledReg) |
| 3462 |
RegUses.countRegister(F.ScaledReg, LUIdx); |
3462 |
RegUses.countRegister(F.ScaledReg, LUIdx); |
| 3463 |
for (const SCEV *BaseReg : F.BaseRegs) |
3463 |
for (const SCEV *BaseReg : F.BaseRegs) |
| 3464 |
RegUses.countRegister(BaseReg, LUIdx); |
3464 |
RegUses.countRegister(BaseReg, LUIdx); |
| 3465 |
} |
3465 |
} |
| 3466 |
|
3466 |
|
| 3467 |
/// If the given formula has not yet been inserted, add it to the list, and |
3467 |
/// If the given formula has not yet been inserted, add it to the list, and |
| 3468 |
/// return true. Return false otherwise. |
3468 |
/// return true. Return false otherwise. |
| 3469 |
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { |
3469 |
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { |
| 3470 |
// Do not insert formula that we will not be able to expand. |
3470 |
// Do not insert formula that we will not be able to expand. |
| 3471 |
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && |
3471 |
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && |
| 3472 |
"Formula is illegal"); |
3472 |
"Formula is illegal"); |
| 3473 |
|
3473 |
|
| 3474 |
if (!LU.InsertFormula(F, *L)) |
3474 |
if (!LU.InsertFormula(F, *L)) |
| 3475 |
return false; |
3475 |
return false; |
| 3476 |
|
3476 |
|
| 3477 |
CountRegisters(F, LUIdx); |
3477 |
CountRegisters(F, LUIdx); |
| 3478 |
return true; |
3478 |
return true; |
| 3479 |
} |
3479 |
} |
| 3480 |
|
3480 |
|
| 3481 |
/// Check for other uses of loop-invariant values which we're tracking. These |
3481 |
/// Check for other uses of loop-invariant values which we're tracking. These |
| 3482 |
/// other uses will pin these values in registers, making them less profitable |
3482 |
/// other uses will pin these values in registers, making them less profitable |
| 3483 |
/// for elimination. |
3483 |
/// for elimination. |
| 3484 |
/// TODO: This currently misses non-constant addrec step registers. |
3484 |
/// TODO: This currently misses non-constant addrec step registers. |
| 3485 |
/// TODO: Should this give more weight to users inside the loop? |
3485 |
/// TODO: Should this give more weight to users inside the loop? |
| 3486 |
void |
3486 |
void |
| 3487 |
LSRInstance::CollectLoopInvariantFixupsAndFormulae() { |
3487 |
LSRInstance::CollectLoopInvariantFixupsAndFormulae() { |
| 3488 |
SmallVector Worklist(RegUses.begin(), RegUses.end()); |
3488 |
SmallVector Worklist(RegUses.begin(), RegUses.end()); |
| 3489 |
SmallPtrSet Visited; |
3489 |
SmallPtrSet Visited; |
| 3490 |
|
3490 |
|
| 3491 |
while (!Worklist.empty()) { |
3491 |
while (!Worklist.empty()) { |
| 3492 |
const SCEV *S = Worklist.pop_back_val(); |
3492 |
const SCEV *S = Worklist.pop_back_val(); |
| 3493 |
|
3493 |
|
| 3494 |
// Don't process the same SCEV twice |
3494 |
// Don't process the same SCEV twice |
| 3495 |
if (!Visited.insert(S).second) |
3495 |
if (!Visited.insert(S).second) |
| 3496 |
continue; |
3496 |
continue; |
| 3497 |
|
3497 |
|
| 3498 |
if (const SCEVNAryExpr *N = dyn_cast(S)) |
3498 |
if (const SCEVNAryExpr *N = dyn_cast(S)) |
| 3499 |
append_range(Worklist, N->operands()); |
3499 |
append_range(Worklist, N->operands()); |
| 3500 |
else if (const SCEVIntegralCastExpr *C = dyn_cast(S)) |
3500 |
else if (const SCEVIntegralCastExpr *C = dyn_cast(S)) |
| 3501 |
Worklist.push_back(C->getOperand()); |
3501 |
Worklist.push_back(C->getOperand()); |
| 3502 |
else if (const SCEVUDivExpr *D = dyn_cast(S)) { |
3502 |
else if (const SCEVUDivExpr *D = dyn_cast(S)) { |
| 3503 |
Worklist.push_back(D->getLHS()); |
3503 |
Worklist.push_back(D->getLHS()); |
| 3504 |
Worklist.push_back(D->getRHS()); |
3504 |
Worklist.push_back(D->getRHS()); |
| 3505 |
} else if (const SCEVUnknown *US = dyn_cast(S)) { |
3505 |
} else if (const SCEVUnknown *US = dyn_cast(S)) { |
| 3506 |
const Value *V = US->getValue(); |
3506 |
const Value *V = US->getValue(); |
| 3507 |
if (const Instruction *Inst = dyn_cast(V)) { |
3507 |
if (const Instruction *Inst = dyn_cast(V)) { |
| 3508 |
// Look for instructions defined outside the loop. |
3508 |
// Look for instructions defined outside the loop. |
| 3509 |
if (L->contains(Inst)) continue; |
3509 |
if (L->contains(Inst)) continue; |
| 3510 |
} else if (isa(V)) |
3510 |
} else if (isa(V)) |
| 3511 |
// Constants can be re-materialized. |
3511 |
// Constants can be re-materialized. |
| 3512 |
continue; |
3512 |
continue; |
| 3513 |
for (const Use &U : V->uses()) { |
3513 |
for (const Use &U : V->uses()) { |
| 3514 |
const Instruction *UserInst = dyn_cast(U.getUser()); |
3514 |
const Instruction *UserInst = dyn_cast(U.getUser()); |
| 3515 |
// Ignore non-instructions. |
3515 |
// Ignore non-instructions. |
| 3516 |
if (!UserInst) |
3516 |
if (!UserInst) |
| 3517 |
continue; |
3517 |
continue; |
| 3518 |
// Don't bother if the instruction is an EHPad. |
3518 |
// Don't bother if the instruction is an EHPad. |
| 3519 |
if (UserInst->isEHPad()) |
3519 |
if (UserInst->isEHPad()) |
| 3520 |
continue; |
3520 |
continue; |
| 3521 |
// Ignore instructions in other functions (as can happen with |
3521 |
// Ignore instructions in other functions (as can happen with |
| 3522 |
// Constants). |
3522 |
// Constants). |
| 3523 |
if (UserInst->getParent()->getParent() != L->getHeader()->getParent()) |
3523 |
if (UserInst->getParent()->getParent() != L->getHeader()->getParent()) |
| 3524 |
continue; |
3524 |
continue; |
| 3525 |
// Ignore instructions not dominated by the loop. |
3525 |
// Ignore instructions not dominated by the loop. |
| 3526 |
const BasicBlock *UseBB = !isa(UserInst) ? |
3526 |
const BasicBlock *UseBB = !isa(UserInst) ? |
| 3527 |
UserInst->getParent() : |
3527 |
UserInst->getParent() : |
| 3528 |
cast(UserInst)->getIncomingBlock( |
3528 |
cast(UserInst)->getIncomingBlock( |
| 3529 |
PHINode::getIncomingValueNumForOperand(U.getOperandNo())); |
3529 |
PHINode::getIncomingValueNumForOperand(U.getOperandNo())); |
| 3530 |
if (!DT.dominates(L->getHeader(), UseBB)) |
3530 |
if (!DT.dominates(L->getHeader(), UseBB)) |
| 3531 |
continue; |
3531 |
continue; |
| 3532 |
// Don't bother if the instruction is in a BB which ends in an EHPad. |
3532 |
// Don't bother if the instruction is in a BB which ends in an EHPad. |
| 3533 |
if (UseBB->getTerminator()->isEHPad()) |
3533 |
if (UseBB->getTerminator()->isEHPad()) |
| 3534 |
continue; |
3534 |
continue; |
| 3535 |
|
3535 |
|
| 3536 |
// Ignore cases in which the currently-examined value could come from |
3536 |
// Ignore cases in which the currently-examined value could come from |
| 3537 |
// a basic block terminated with an EHPad. This checks all incoming |
3537 |
// a basic block terminated with an EHPad. This checks all incoming |
| 3538 |
// blocks of the phi node since it is possible that the same incoming |
3538 |
// blocks of the phi node since it is possible that the same incoming |
| 3539 |
// value comes from multiple basic blocks, only some of which may end |
3539 |
// value comes from multiple basic blocks, only some of which may end |
| 3540 |
// in an EHPad. If any of them do, a subsequent rewrite attempt by this |
3540 |
// in an EHPad. If any of them do, a subsequent rewrite attempt by this |
| 3541 |
// pass would try to insert instructions into an EHPad, hitting an |
3541 |
// pass would try to insert instructions into an EHPad, hitting an |
| 3542 |
// assertion. |
3542 |
// assertion. |
| 3543 |
if (isa(UserInst)) { |
3543 |
if (isa(UserInst)) { |
| 3544 |
const auto *PhiNode = cast(UserInst); |
3544 |
const auto *PhiNode = cast(UserInst); |
| 3545 |
bool HasIncompatibleEHPTerminatedBlock = false; |
3545 |
bool HasIncompatibleEHPTerminatedBlock = false; |
| 3546 |
llvm::Value *ExpectedValue = U; |
3546 |
llvm::Value *ExpectedValue = U; |
| 3547 |
for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) { |
3547 |
for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) { |
| 3548 |
if (PhiNode->getIncomingValue(I) == ExpectedValue) { |
3548 |
if (PhiNode->getIncomingValue(I) == ExpectedValue) { |
| 3549 |
if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) { |
3549 |
if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) { |
| 3550 |
HasIncompatibleEHPTerminatedBlock = true; |
3550 |
HasIncompatibleEHPTerminatedBlock = true; |
| 3551 |
break; |
3551 |
break; |
| 3552 |
} |
3552 |
} |
| 3553 |
} |
3553 |
} |
| 3554 |
} |
3554 |
} |
| 3555 |
if (HasIncompatibleEHPTerminatedBlock) { |
3555 |
if (HasIncompatibleEHPTerminatedBlock) { |
| 3556 |
continue; |
3556 |
continue; |
| 3557 |
} |
3557 |
} |
| 3558 |
} |
3558 |
} |
| 3559 |
|
3559 |
|
| 3560 |
// Don't bother rewriting PHIs in catchswitch blocks. |
3560 |
// Don't bother rewriting PHIs in catchswitch blocks. |
| 3561 |
if (isa(UserInst->getParent()->getTerminator())) |
3561 |
if (isa(UserInst->getParent()->getTerminator())) |
| 3562 |
continue; |
3562 |
continue; |
| 3563 |
// Ignore uses which are part of other SCEV expressions, to avoid |
3563 |
// Ignore uses which are part of other SCEV expressions, to avoid |
| 3564 |
// analyzing them multiple times. |
3564 |
// analyzing them multiple times. |
| 3565 |
if (SE.isSCEVable(UserInst->getType())) { |
3565 |
if (SE.isSCEVable(UserInst->getType())) { |
| 3566 |
const SCEV *UserS = SE.getSCEV(const_cast(UserInst)); |
3566 |
const SCEV *UserS = SE.getSCEV(const_cast(UserInst)); |
| 3567 |
// If the user is a no-op, look through to its uses. |
3567 |
// If the user is a no-op, look through to its uses. |
| 3568 |
if (!isa(UserS)) |
3568 |
if (!isa(UserS)) |
| 3569 |
continue; |
3569 |
continue; |
| 3570 |
if (UserS == US) { |
3570 |
if (UserS == US) { |
| 3571 |
Worklist.push_back( |
3571 |
Worklist.push_back( |
| 3572 |
SE.getUnknown(const_cast(UserInst))); |
3572 |
SE.getUnknown(const_cast(UserInst))); |
| 3573 |
continue; |
3573 |
continue; |
| 3574 |
} |
3574 |
} |
| 3575 |
} |
3575 |
} |
| 3576 |
// Ignore icmp instructions which are already being analyzed. |
3576 |
// Ignore icmp instructions which are already being analyzed. |
| 3577 |
if (const ICmpInst *ICI = dyn_cast(UserInst)) { |
3577 |
if (const ICmpInst *ICI = dyn_cast(UserInst)) { |
| 3578 |
unsigned OtherIdx = !U.getOperandNo(); |
3578 |
unsigned OtherIdx = !U.getOperandNo(); |
| 3579 |
Value *OtherOp = const_cast(ICI->getOperand(OtherIdx)); |
3579 |
Value *OtherOp = const_cast(ICI->getOperand(OtherIdx)); |
| 3580 |
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L)) |
3580 |
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L)) |
| 3581 |
continue; |
3581 |
continue; |
| 3582 |
} |
3582 |
} |
| 3583 |
|
3583 |
|
| 3584 |
std::pair P = getUse( |
3584 |
std::pair P = getUse( |
| 3585 |
S, LSRUse::Basic, MemAccessTy()); |
3585 |
S, LSRUse::Basic, MemAccessTy()); |
| 3586 |
size_t LUIdx = P.first; |
3586 |
size_t LUIdx = P.first; |
| 3587 |
int64_t Offset = P.second; |
3587 |
int64_t Offset = P.second; |
| 3588 |
LSRUse &LU = Uses[LUIdx]; |
3588 |
LSRUse &LU = Uses[LUIdx]; |
| 3589 |
LSRFixup &LF = LU.getNewFixup(); |
3589 |
LSRFixup &LF = LU.getNewFixup(); |
| 3590 |
LF.UserInst = const_cast(UserInst); |
3590 |
LF.UserInst = const_cast(UserInst); |
| 3591 |
LF.OperandValToReplace = U; |
3591 |
LF.OperandValToReplace = U; |
| 3592 |
LF.Offset = Offset; |
3592 |
LF.Offset = Offset; |
| 3593 |
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); |
3593 |
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); |
| 3594 |
if (!LU.WidestFixupType || |
3594 |
if (!LU.WidestFixupType || |
| 3595 |
SE.getTypeSizeInBits(LU.WidestFixupType) < |
3595 |
SE.getTypeSizeInBits(LU.WidestFixupType) < |
| 3596 |
SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) |
3596 |
SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) |
| 3597 |
LU.WidestFixupType = LF.OperandValToReplace->getType(); |
3597 |
LU.WidestFixupType = LF.OperandValToReplace->getType(); |
| 3598 |
InsertSupplementalFormula(US, LU, LUIdx); |
3598 |
InsertSupplementalFormula(US, LU, LUIdx); |
| 3599 |
CountRegisters(LU.Formulae.back(), Uses.size() - 1); |
3599 |
CountRegisters(LU.Formulae.back(), Uses.size() - 1); |
| 3600 |
break; |
3600 |
break; |
| 3601 |
} |
3601 |
} |
| 3602 |
} |
3602 |
} |
| 3603 |
} |
3603 |
} |
| 3604 |
} |
3604 |
} |
| 3605 |
|
3605 |
|
| 3606 |
/// Split S into subexpressions which can be pulled out into separate |
3606 |
/// Split S into subexpressions which can be pulled out into separate |
| 3607 |
/// registers. If C is non-null, multiply each subexpression by C. |
3607 |
/// registers. If C is non-null, multiply each subexpression by C. |
| 3608 |
/// |
3608 |
/// |
| 3609 |
/// Return remainder expression after factoring the subexpressions captured by |
3609 |
/// Return remainder expression after factoring the subexpressions captured by |
| 3610 |
/// Ops. If Ops is complete, return NULL. |
3610 |
/// Ops. If Ops is complete, return NULL. |
| 3611 |
static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, |
3611 |
static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, |
| 3612 |
SmallVectorImpl &Ops, |
3612 |
SmallVectorImpl &Ops, |
| 3613 |
const Loop *L, |
3613 |
const Loop *L, |
| 3614 |
ScalarEvolution &SE, |
3614 |
ScalarEvolution &SE, |
| 3615 |
unsigned Depth = 0) { |
3615 |
unsigned Depth = 0) { |
| 3616 |
// Arbitrarily cap recursion to protect compile time. |
3616 |
// Arbitrarily cap recursion to protect compile time. |
| 3617 |
if (Depth >= 3) |
3617 |
if (Depth >= 3) |
| 3618 |
return S; |
3618 |
return S; |
| 3619 |
|
3619 |
|
| 3620 |
if (const SCEVAddExpr *Add = dyn_cast(S)) { |
3620 |
if (const SCEVAddExpr *Add = dyn_cast(S)) { |
| 3621 |
// Break out add operands. |
3621 |
// Break out add operands. |
| 3622 |
for (const SCEV *S : Add->operands()) { |
3622 |
for (const SCEV *S : Add->operands()) { |
| 3623 |
const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1); |
3623 |
const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1); |
| 3624 |
if (Remainder) |
3624 |
if (Remainder) |
| 3625 |
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); |
3625 |
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); |
| 3626 |
} |
3626 |
} |
| 3627 |
return nullptr; |
3627 |
return nullptr; |
| 3628 |
} else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
3628 |
} else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { |
| 3629 |
// Split a non-zero base out of an addrec. |
3629 |
// Split a non-zero base out of an addrec. |
| 3630 |
if (AR->getStart()->isZero() || !AR->isAffine()) |
3630 |
if (AR->getStart()->isZero() || !AR->isAffine()) |
| 3631 |
return S; |
3631 |
return S; |
| 3632 |
|
3632 |
|
| 3633 |
const SCEV *Remainder = CollectSubexprs(AR->getStart(), |
3633 |
const SCEV *Remainder = CollectSubexprs(AR->getStart(), |
| 3634 |
C, Ops, L, SE, Depth+1); |
3634 |
C, Ops, L, SE, Depth+1); |
| 3635 |
// Split the non-zero AddRec unless it is part of a nested recurrence that |
3635 |
// Split the non-zero AddRec unless it is part of a nested recurrence that |
| 3636 |
// does not pertain to this loop. |
3636 |
// does not pertain to this loop. |
| 3637 |
if (Remainder && (AR->getLoop() == L || !isa(Remainder))) { |
3637 |
if (Remainder && (AR->getLoop() == L || !isa(Remainder))) { |
| 3638 |
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); |
3638 |
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); |
| 3639 |
Remainder = nullptr; |
3639 |
Remainder = nullptr; |
| 3640 |
} |
3640 |
} |
| 3641 |
if (Remainder != AR->getStart()) { |
3641 |
if (Remainder != AR->getStart()) { |
| 3642 |
if (!Remainder) |
3642 |
if (!Remainder) |
| 3643 |
Remainder = SE.getConstant(AR->getType(), 0); |
3643 |
Remainder = SE.getConstant(AR->getType(), 0); |
| 3644 |
return SE.getAddRecExpr(Remainder, |
3644 |
return SE.getAddRecExpr(Remainder, |
| 3645 |
AR->getStepRecurrence(SE), |
3645 |
AR->getStepRecurrence(SE), |
| 3646 |
AR->getLoop(), |
3646 |
AR->getLoop(), |
| 3647 |
//FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
3647 |
//FIXME: AR->getNoWrapFlags(SCEV::FlagNW) |
| 3648 |
SCEV::FlagAnyWrap); |
3648 |
SCEV::FlagAnyWrap); |
| 3649 |
} |
3649 |
} |
| 3650 |
} else if (const SCEVMulExpr *Mul = dyn_cast(S)) { |
3650 |
} else if (const SCEVMulExpr *Mul = dyn_cast(S)) { |
| 3651 |
// Break (C * (a + b + c)) into C*a + C*b + C*c. |
3651 |
// Break (C * (a + b + c)) into C*a + C*b + C*c. |
| 3652 |
if (Mul->getNumOperands() != 2) |
3652 |
if (Mul->getNumOperands() != 2) |
| 3653 |
return S; |
3653 |
return S; |
| 3654 |
if (const SCEVConstant *Op0 = |
3654 |
if (const SCEVConstant *Op0 = |
| 3655 |
dyn_cast(Mul->getOperand(0))) { |
3655 |
dyn_cast(Mul->getOperand(0))) { |
| 3656 |
C = C ? cast(SE.getMulExpr(C, Op0)) : Op0; |
3656 |
C = C ? cast(SE.getMulExpr(C, Op0)) : Op0; |
| 3657 |
const SCEV *Remainder = |
3657 |
const SCEV *Remainder = |
| 3658 |
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); |
3658 |
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); |
| 3659 |
if (Remainder) |
3659 |
if (Remainder) |
| 3660 |
Ops.push_back(SE.getMulExpr(C, Remainder)); |
3660 |
Ops.push_back(SE.getMulExpr(C, Remainder)); |
| 3661 |
return nullptr; |
3661 |
return nullptr; |
| 3662 |
} |
3662 |
} |
| 3663 |
} |
3663 |
} |
| 3664 |
return S; |
3664 |
return S; |
| 3665 |
} |
3665 |
} |
| 3666 |
|
3666 |
|
| 3667 |
/// Return true if the SCEV represents a value that may end up as a |
3667 |
/// Return true if the SCEV represents a value that may end up as a |
| 3668 |
/// post-increment operation. |
3668 |
/// post-increment operation. |
| 3669 |
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, |
3669 |
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, |
| 3670 |
LSRUse &LU, const SCEV *S, const Loop *L, |
3670 |
LSRUse &LU, const SCEV *S, const Loop *L, |
| 3671 |
ScalarEvolution &SE) { |
3671 |
ScalarEvolution &SE) { |
| 3672 |
if (LU.Kind != LSRUse::Address || |
3672 |
if (LU.Kind != LSRUse::Address || |
| 3673 |
!LU.AccessTy.getType()->isIntOrIntVectorTy()) |
3673 |
!LU.AccessTy.getType()->isIntOrIntVectorTy()) |
| 3674 |
return false; |
3674 |
return false; |
| 3675 |
const SCEVAddRecExpr *AR = dyn_cast(S); |
3675 |
const SCEVAddRecExpr *AR = dyn_cast(S); |
| 3676 |
if (!AR) |
3676 |
if (!AR) |
| 3677 |
return false; |
3677 |
return false; |
| 3678 |
const SCEV *LoopStep = AR->getStepRecurrence(SE); |
3678 |
const SCEV *LoopStep = AR->getStepRecurrence(SE); |
| 3679 |
if (!isa(LoopStep)) |
3679 |
if (!isa(LoopStep)) |
| 3680 |
return false; |
3680 |
return false; |
| 3681 |
// Check if a post-indexed load/store can be used. |
3681 |
// Check if a post-indexed load/store can be used. |
| 3682 |
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || |
3682 |
if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || |
| 3683 |
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { |
3683 |
TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { |
| 3684 |
const SCEV *LoopStart = AR->getStart(); |
3684 |
const SCEV *LoopStart = AR->getStart(); |
| 3685 |
if (!isa(LoopStart) && SE.isLoopInvariant(LoopStart, L)) |
3685 |
if (!isa(LoopStart) && SE.isLoopInvariant(LoopStart, L)) |
| 3686 |
return true; |
3686 |
return true; |
| 3687 |
} |
3687 |
} |
| 3688 |
return false; |
3688 |
return false; |
| 3689 |
} |
3689 |
} |
| 3690 |
|
3690 |
|
| 3691 |
/// Helper function for LSRInstance::GenerateReassociations. |
3691 |
/// Helper function for LSRInstance::GenerateReassociations. |
| 3692 |
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, |
3692 |
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, |
| 3693 |
const Formula &Base, |
3693 |
const Formula &Base, |
| 3694 |
unsigned Depth, size_t Idx, |
3694 |
unsigned Depth, size_t Idx, |
| 3695 |
bool IsScaledReg) { |
3695 |
bool IsScaledReg) { |
| 3696 |
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; |
3696 |
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; |
| 3697 |
// Don't generate reassociations for the base register of a value that |
3697 |
// Don't generate reassociations for the base register of a value that |
| 3698 |
// may generate a post-increment operator. The reason is that the |
3698 |
// may generate a post-increment operator. The reason is that the |
| 3699 |
// reassociations cause extra base+register formula to be created, |
3699 |
// reassociations cause extra base+register formula to be created, |
| 3700 |
// and possibly chosen, but the post-increment is more efficient. |
3700 |
// and possibly chosen, but the post-increment is more efficient. |
| 3701 |
if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) |
3701 |
if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) |
| 3702 |
return; |
3702 |
return; |
| 3703 |
SmallVector AddOps; |
3703 |
SmallVector AddOps; |
| 3704 |
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); |
3704 |
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); |
| 3705 |
if (Remainder) |
3705 |
if (Remainder) |
| 3706 |
AddOps.push_back(Remainder); |
3706 |
AddOps.push_back(Remainder); |
| 3707 |
|
3707 |
|
| 3708 |
if (AddOps.size() == 1) |
3708 |
if (AddOps.size() == 1) |
| 3709 |
return; |
3709 |
return; |
| 3710 |
|
3710 |
|
| 3711 |
for (SmallVectorImpl::const_iterator J = AddOps.begin(), |
3711 |
for (SmallVectorImpl::const_iterator J = AddOps.begin(), |
| 3712 |
JE = AddOps.end(); |
3712 |
JE = AddOps.end(); |
| 3713 |
J != JE; ++J) { |
3713 |
J != JE; ++J) { |
| 3714 |
// Loop-variant "unknown" values are uninteresting; we won't be able to |
3714 |
// Loop-variant "unknown" values are uninteresting; we won't be able to |
| 3715 |
// do anything meaningful with them. |
3715 |
// do anything meaningful with them. |
| 3716 |
if (isa(*J) && !SE.isLoopInvariant(*J, L)) |
3716 |
if (isa(*J) && !SE.isLoopInvariant(*J, L)) |
| 3717 |
continue; |
3717 |
continue; |
| 3718 |
|
3718 |
|
| 3719 |
// Don't pull a constant into a register if the constant could be folded |
3719 |
// Don't pull a constant into a register if the constant could be folded |
| 3720 |
// into an immediate field. |
3720 |
// into an immediate field. |
| 3721 |
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, |
3721 |
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, |
| 3722 |
LU.AccessTy, *J, Base.getNumRegs() > 1)) |
3722 |
LU.AccessTy, *J, Base.getNumRegs() > 1)) |
| 3723 |
continue; |
3723 |
continue; |
| 3724 |
|
3724 |
|
| 3725 |
// Collect all operands except *J. |
3725 |
// Collect all operands except *J. |
| 3726 |
SmallVector InnerAddOps( |
3726 |
SmallVector InnerAddOps( |
| 3727 |
((const SmallVector &)AddOps).begin(), J); |
3727 |
((const SmallVector &)AddOps).begin(), J); |
| 3728 |
InnerAddOps.append(std::next(J), |
3728 |
InnerAddOps.append(std::next(J), |
| 3729 |
((const SmallVector &)AddOps).end()); |
3729 |
((const SmallVector &)AddOps).end()); |
| 3730 |
|
3730 |
|
| 3731 |
// Don't leave just a constant behind in a register if the constant could |
3731 |
// Don't leave just a constant behind in a register if the constant could |
| 3732 |
// be folded into an immediate field. |
3732 |
// be folded into an immediate field. |
| 3733 |
if (InnerAddOps.size() == 1 && |
3733 |
if (InnerAddOps.size() == 1 && |
| 3734 |
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, |
3734 |
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, |
| 3735 |
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) |
3735 |
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) |
| 3736 |
continue; |
3736 |
continue; |
| 3737 |
|
3737 |
|
| 3738 |
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); |
3738 |
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); |
| 3739 |
if (InnerSum->isZero()) |
3739 |
if (InnerSum->isZero()) |
| 3740 |
continue; |
3740 |
continue; |
| 3741 |
Formula F = Base; |
3741 |
Formula F = Base; |
| 3742 |
|
3742 |
|
| 3743 |
// Add the remaining pieces of the add back into the new formula. |
3743 |
// Add the remaining pieces of the add back into the new formula. |
| 3744 |
const SCEVConstant *InnerSumSC = dyn_cast(InnerSum); |
3744 |
const SCEVConstant *InnerSumSC = dyn_cast(InnerSum); |
| 3745 |
if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && |
3745 |
if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && |
| 3746 |
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + |
3746 |
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + |
| 3747 |
InnerSumSC->getValue()->getZExtValue())) { |
3747 |
InnerSumSC->getValue()->getZExtValue())) { |
| 3748 |
F.UnfoldedOffset = |
3748 |
F.UnfoldedOffset = |
| 3749 |
(uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); |
3749 |
(uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); |
| 3750 |
if (IsScaledReg) |
3750 |
if (IsScaledReg) |
| 3751 |
F.ScaledReg = nullptr; |
3751 |
F.ScaledReg = nullptr; |
| 3752 |
else |
3752 |
else |
| 3753 |
F.BaseRegs.erase(F.BaseRegs.begin() + Idx); |
3753 |
F.BaseRegs.erase(F.BaseRegs.begin() + Idx); |
| 3754 |
} else if (IsScaledReg) |
3754 |
} else if (IsScaledReg) |
| 3755 |
F.ScaledReg = InnerSum; |
3755 |
F.ScaledReg = InnerSum; |
| 3756 |
else |
3756 |
else |
| 3757 |
F.BaseRegs[Idx] = InnerSum; |
3757 |
F.BaseRegs[Idx] = InnerSum; |
| 3758 |
|
3758 |
|
| 3759 |
// Add J as its own register, or an unfolded immediate. |
3759 |
// Add J as its own register, or an unfolded immediate. |
| 3760 |
const SCEVConstant *SC = dyn_cast(*J); |
3760 |
const SCEVConstant *SC = dyn_cast(*J); |
| 3761 |
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && |
3761 |
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && |
| 3762 |
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + |
3762 |
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + |
| 3763 |
SC->getValue()->getZExtValue())) |
3763 |
SC->getValue()->getZExtValue())) |
| 3764 |
F.UnfoldedOffset = |
3764 |
F.UnfoldedOffset = |
| 3765 |
(uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); |
3765 |
(uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); |
| 3766 |
else |
3766 |
else |
| 3767 |
F.BaseRegs.push_back(*J); |
3767 |
F.BaseRegs.push_back(*J); |
| 3768 |
// We may have changed the number of register in base regs, adjust the |
3768 |
// We may have changed the number of register in base regs, adjust the |
| 3769 |
// formula accordingly. |
3769 |
// formula accordingly. |
| 3770 |
F.canonicalize(*L); |
3770 |
F.canonicalize(*L); |
| 3771 |
|
3771 |
|
| 3772 |
if (InsertFormula(LU, LUIdx, F)) |
3772 |
if (InsertFormula(LU, LUIdx, F)) |
| 3773 |
// If that formula hadn't been seen before, recurse to find more like |
3773 |
// If that formula hadn't been seen before, recurse to find more like |
| 3774 |
// it. |
3774 |
// it. |
| 3775 |
// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2) |
3775 |
// Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2) |
| 3776 |
// Because just Depth is not enough to bound compile time. |
3776 |
// Because just Depth is not enough to bound compile time. |
| 3777 |
// This means that every time AddOps.size() is greater 16^x we will add |
3777 |
// This means that every time AddOps.size() is greater 16^x we will add |
| 3778 |
// x to Depth. |
3778 |
// x to Depth. |
| 3779 |
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), |
3779 |
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), |
| 3780 |
Depth + 1 + (Log2_32(AddOps.size()) >> 2)); |
3780 |
Depth + 1 + (Log2_32(AddOps.size()) >> 2)); |
| 3781 |
} |
3781 |
} |
| 3782 |
} |
3782 |
} |
| 3783 |
|
3783 |
|
| 3784 |
/// Split out subexpressions from adds and the bases of addrecs. |
3784 |
/// Split out subexpressions from adds and the bases of addrecs. |
| 3785 |
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, |
3785 |
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, |
| 3786 |
Formula Base, unsigned Depth) { |
3786 |
Formula Base, unsigned Depth) { |
| 3787 |
assert(Base.isCanonical(*L) && "Input must be in the canonical form"); |
3787 |
assert(Base.isCanonical(*L) && "Input must be in the canonical form"); |
| 3788 |
// Arbitrarily cap recursion to protect compile time. |
3788 |
// Arbitrarily cap recursion to protect compile time. |
| 3789 |
if (Depth >= 3) |
3789 |
if (Depth >= 3) |
| 3790 |
return; |
3790 |
return; |
| 3791 |
|
3791 |
|
| 3792 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) |
3792 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) |
| 3793 |
GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i); |
3793 |
GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i); |
| 3794 |
|
3794 |
|
| 3795 |
if (Base.Scale == 1) |
3795 |
if (Base.Scale == 1) |
| 3796 |
GenerateReassociationsImpl(LU, LUIdx, Base, Depth, |
3796 |
GenerateReassociationsImpl(LU, LUIdx, Base, Depth, |
| 3797 |
/* Idx */ -1, /* IsScaledReg */ true); |
3797 |
/* Idx */ -1, /* IsScaledReg */ true); |
| 3798 |
} |
3798 |
} |
| 3799 |
|
3799 |
|
| 3800 |
/// Generate a formula consisting of all of the loop-dominating registers added |
3800 |
/// Generate a formula consisting of all of the loop-dominating registers added |
| 3801 |
/// into a single register. |
3801 |
/// into a single register. |
| 3802 |
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, |
3802 |
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, |
| 3803 |
Formula Base) { |
3803 |
Formula Base) { |
| 3804 |
// This method is only interesting on a plurality of registers. |
3804 |
// This method is only interesting on a plurality of registers. |
| 3805 |
if (Base.BaseRegs.size() + (Base.Scale == 1) + |
3805 |
if (Base.BaseRegs.size() + (Base.Scale == 1) + |
| 3806 |
(Base.UnfoldedOffset != 0) <= 1) |
3806 |
(Base.UnfoldedOffset != 0) <= 1) |
| 3807 |
return; |
3807 |
return; |
| 3808 |
|
3808 |
|
| 3809 |
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before |
3809 |
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before |
| 3810 |
// processing the formula. |
3810 |
// processing the formula. |
| 3811 |
Base.unscale(); |
3811 |
Base.unscale(); |
| 3812 |
SmallVector Ops; |
3812 |
SmallVector Ops; |
| 3813 |
Formula NewBase = Base; |
3813 |
Formula NewBase = Base; |
| 3814 |
NewBase.BaseRegs.clear(); |
3814 |
NewBase.BaseRegs.clear(); |
| 3815 |
Type *CombinedIntegerType = nullptr; |
3815 |
Type *CombinedIntegerType = nullptr; |
| 3816 |
for (const SCEV *BaseReg : Base.BaseRegs) { |
3816 |
for (const SCEV *BaseReg : Base.BaseRegs) { |
| 3817 |
if (SE.properlyDominates(BaseReg, L->getHeader()) && |
3817 |
if (SE.properlyDominates(BaseReg, L->getHeader()) && |
| 3818 |
!SE.hasComputableLoopEvolution(BaseReg, L)) { |
3818 |
!SE.hasComputableLoopEvolution(BaseReg, L)) { |
| 3819 |
if (!CombinedIntegerType) |
3819 |
if (!CombinedIntegerType) |
| 3820 |
CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType()); |
3820 |
CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType()); |
| 3821 |
Ops.push_back(BaseReg); |
3821 |
Ops.push_back(BaseReg); |
| 3822 |
} |
3822 |
} |
| 3823 |
else |
3823 |
else |
| 3824 |
NewBase.BaseRegs.push_back(BaseReg); |
3824 |
NewBase.BaseRegs.push_back(BaseReg); |
| 3825 |
} |
3825 |
} |
| 3826 |
|
3826 |
|
| 3827 |
// If no register is relevant, we're done. |
3827 |
// If no register is relevant, we're done. |
| 3828 |
if (Ops.size() == 0) |
3828 |
if (Ops.size() == 0) |
| 3829 |
return; |
3829 |
return; |
| 3830 |
|
3830 |
|
| 3831 |
// Utility function for generating the required variants of the combined |
3831 |
// Utility function for generating the required variants of the combined |
| 3832 |
// registers. |
3832 |
// registers. |
| 3833 |
auto GenerateFormula = [&](const SCEV *Sum) { |
3833 |
auto GenerateFormula = [&](const SCEV *Sum) { |
| 3834 |
Formula F = NewBase; |
3834 |
Formula F = NewBase; |
| 3835 |
|
3835 |
|
| 3836 |
// TODO: If Sum is zero, it probably means ScalarEvolution missed an |
3836 |
// TODO: If Sum is zero, it probably means ScalarEvolution missed an |
| 3837 |
// opportunity to fold something. For now, just ignore such cases |
3837 |
// opportunity to fold something. For now, just ignore such cases |
| 3838 |
// rather than proceed with zero in a register. |
3838 |
// rather than proceed with zero in a register. |
| 3839 |
if (Sum->isZero()) |
3839 |
if (Sum->isZero()) |
| 3840 |
return; |
3840 |
return; |
| 3841 |
|
3841 |
|
| 3842 |
F.BaseRegs.push_back(Sum); |
3842 |
F.BaseRegs.push_back(Sum); |
| 3843 |
F.canonicalize(*L); |
3843 |
F.canonicalize(*L); |
| 3844 |
(void)InsertFormula(LU, LUIdx, F); |
3844 |
(void)InsertFormula(LU, LUIdx, F); |
| 3845 |
}; |
3845 |
}; |
| 3846 |
|
3846 |
|
| 3847 |
// If we collected at least two registers, generate a formula combining them. |
3847 |
// If we collected at least two registers, generate a formula combining them. |
| 3848 |
if (Ops.size() > 1) { |
3848 |
if (Ops.size() > 1) { |
| 3849 |
SmallVector OpsCopy(Ops); // Don't let SE modify Ops. |
3849 |
SmallVector OpsCopy(Ops); // Don't let SE modify Ops. |
| 3850 |
GenerateFormula(SE.getAddExpr(OpsCopy)); |
3850 |
GenerateFormula(SE.getAddExpr(OpsCopy)); |
| 3851 |
} |
3851 |
} |
| 3852 |
|
3852 |
|
| 3853 |
// If we have an unfolded offset, generate a formula combining it with the |
3853 |
// If we have an unfolded offset, generate a formula combining it with the |
| 3854 |
// registers collected. |
3854 |
// registers collected. |
| 3855 |
if (NewBase.UnfoldedOffset) { |
3855 |
if (NewBase.UnfoldedOffset) { |
| 3856 |
assert(CombinedIntegerType && "Missing a type for the unfolded offset"); |
3856 |
assert(CombinedIntegerType && "Missing a type for the unfolded offset"); |
| 3857 |
Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset, |
3857 |
Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset, |
| 3858 |
true)); |
3858 |
true)); |
| 3859 |
NewBase.UnfoldedOffset = 0; |
3859 |
NewBase.UnfoldedOffset = 0; |
| 3860 |
GenerateFormula(SE.getAddExpr(Ops)); |
3860 |
GenerateFormula(SE.getAddExpr(Ops)); |
| 3861 |
} |
3861 |
} |
| 3862 |
} |
3862 |
} |
| 3863 |
|
3863 |
|
| 3864 |
/// Helper function for LSRInstance::GenerateSymbolicOffsets. |
3864 |
/// Helper function for LSRInstance::GenerateSymbolicOffsets. |
| 3865 |
void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, |
3865 |
void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, |
| 3866 |
const Formula &Base, size_t Idx, |
3866 |
const Formula &Base, size_t Idx, |
| 3867 |
bool IsScaledReg) { |
3867 |
bool IsScaledReg) { |
| 3868 |
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; |
3868 |
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; |
| 3869 |
GlobalValue *GV = ExtractSymbol(G, SE); |
3869 |
GlobalValue *GV = ExtractSymbol(G, SE); |
| 3870 |
if (G->isZero() || !GV) |
3870 |
if (G->isZero() || !GV) |
| 3871 |
return; |
3871 |
return; |
| 3872 |
Formula F = Base; |
3872 |
Formula F = Base; |
| 3873 |
F.BaseGV = GV; |
3873 |
F.BaseGV = GV; |
| 3874 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) |
3874 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) |
| 3875 |
return; |
3875 |
return; |
| 3876 |
if (IsScaledReg) |
3876 |
if (IsScaledReg) |
| 3877 |
F.ScaledReg = G; |
3877 |
F.ScaledReg = G; |
| 3878 |
else |
3878 |
else |
| 3879 |
F.BaseRegs[Idx] = G; |
3879 |
F.BaseRegs[Idx] = G; |
| 3880 |
(void)InsertFormula(LU, LUIdx, F); |
3880 |
(void)InsertFormula(LU, LUIdx, F); |
| 3881 |
} |
3881 |
} |
| 3882 |
|
3882 |
|
| 3883 |
/// Generate reuse formulae using symbolic offsets. |
3883 |
/// Generate reuse formulae using symbolic offsets. |
| 3884 |
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, |
3884 |
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, |
| 3885 |
Formula Base) { |
3885 |
Formula Base) { |
| 3886 |
// We can't add a symbolic offset if the address already contains one. |
3886 |
// We can't add a symbolic offset if the address already contains one. |
| 3887 |
if (Base.BaseGV) return; |
3887 |
if (Base.BaseGV) return; |
| 3888 |
|
3888 |
|
| 3889 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) |
3889 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) |
| 3890 |
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i); |
3890 |
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i); |
| 3891 |
if (Base.Scale == 1) |
3891 |
if (Base.Scale == 1) |
| 3892 |
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1, |
3892 |
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1, |
| 3893 |
/* IsScaledReg */ true); |
3893 |
/* IsScaledReg */ true); |
| 3894 |
} |
3894 |
} |
| 3895 |
|
3895 |
|
| 3896 |
/// Helper function for LSRInstance::GenerateConstantOffsets. |
3896 |
/// Helper function for LSRInstance::GenerateConstantOffsets. |
| 3897 |
void LSRInstance::GenerateConstantOffsetsImpl( |
3897 |
void LSRInstance::GenerateConstantOffsetsImpl( |
| 3898 |
LSRUse &LU, unsigned LUIdx, const Formula &Base, |
3898 |
LSRUse &LU, unsigned LUIdx, const Formula &Base, |
| 3899 |
const SmallVectorImpl &Worklist, size_t Idx, bool IsScaledReg) { |
3899 |
const SmallVectorImpl &Worklist, size_t Idx, bool IsScaledReg) { |
| 3900 |
|
3900 |
|
| 3901 |
auto GenerateOffset = [&](const SCEV *G, int64_t Offset) { |
3901 |
auto GenerateOffset = [&](const SCEV *G, int64_t Offset) { |
| 3902 |
Formula F = Base; |
3902 |
Formula F = Base; |
| 3903 |
F.BaseOffset = (uint64_t)Base.BaseOffset - Offset; |
3903 |
F.BaseOffset = (uint64_t)Base.BaseOffset - Offset; |
| 3904 |
|
3904 |
|
| 3905 |
if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) { |
3905 |
if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) { |
| 3906 |
// Add the offset to the base register. |
3906 |
// Add the offset to the base register. |
| 3907 |
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G); |
3907 |
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G); |
| 3908 |
// If it cancelled out, drop the base register, otherwise update it. |
3908 |
// If it cancelled out, drop the base register, otherwise update it. |
| 3909 |
if (NewG->isZero()) { |
3909 |
if (NewG->isZero()) { |
| 3910 |
if (IsScaledReg) { |
3910 |
if (IsScaledReg) { |
| 3911 |
F.Scale = 0; |
3911 |
F.Scale = 0; |
| 3912 |
F.ScaledReg = nullptr; |
3912 |
F.ScaledReg = nullptr; |
| 3913 |
} else |
3913 |
} else |
| 3914 |
F.deleteBaseReg(F.BaseRegs[Idx]); |
3914 |
F.deleteBaseReg(F.BaseRegs[Idx]); |
| 3915 |
F.canonicalize(*L); |
3915 |
F.canonicalize(*L); |
| 3916 |
} else if (IsScaledReg) |
3916 |
} else if (IsScaledReg) |
| 3917 |
F.ScaledReg = NewG; |
3917 |
F.ScaledReg = NewG; |
| 3918 |
else |
3918 |
else |
| 3919 |
F.BaseRegs[Idx] = NewG; |
3919 |
F.BaseRegs[Idx] = NewG; |
| 3920 |
|
3920 |
|
| 3921 |
(void)InsertFormula(LU, LUIdx, F); |
3921 |
(void)InsertFormula(LU, LUIdx, F); |
| 3922 |
} |
3922 |
} |
| 3923 |
}; |
3923 |
}; |
| 3924 |
|
3924 |
|
| 3925 |
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; |
3925 |
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; |
| 3926 |
|
3926 |
|
| 3927 |
// With constant offsets and constant steps, we can generate pre-inc |
3927 |
// With constant offsets and constant steps, we can generate pre-inc |
| 3928 |
// accesses by having the offset equal the step. So, for access #0 with a |
3928 |
// accesses by having the offset equal the step. So, for access #0 with a |
| 3929 |
// step of 8, we generate a G - 8 base which would require the first access |
3929 |
// step of 8, we generate a G - 8 base which would require the first access |
| 3930 |
// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer |
3930 |
// to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer |
| 3931 |
// for itself and hopefully becomes the base for other accesses. This means |
3931 |
// for itself and hopefully becomes the base for other accesses. This means |
| 3932 |
// means that a single pre-indexed access can be generated to become the new |
3932 |
// means that a single pre-indexed access can be generated to become the new |
| 3933 |
// base pointer for each iteration of the loop, resulting in no extra add/sub |
3933 |
// base pointer for each iteration of the loop, resulting in no extra add/sub |
| 3934 |
// instructions for pointer updating. |
3934 |
// instructions for pointer updating. |
| 3935 |
if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) { |
3935 |
if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) { |
| 3936 |
if (auto *GAR = dyn_cast(G)) { |
3936 |
if (auto *GAR = dyn_cast(G)) { |
| 3937 |
if (auto *StepRec = |
3937 |
if (auto *StepRec = |
| 3938 |
dyn_cast(GAR->getStepRecurrence(SE))) { |
3938 |
dyn_cast(GAR->getStepRecurrence(SE))) { |
| 3939 |
const APInt &StepInt = StepRec->getAPInt(); |
3939 |
const APInt &StepInt = StepRec->getAPInt(); |
| 3940 |
int64_t Step = StepInt.isNegative() ? |
3940 |
int64_t Step = StepInt.isNegative() ? |
| 3941 |
StepInt.getSExtValue() : StepInt.getZExtValue(); |
3941 |
StepInt.getSExtValue() : StepInt.getZExtValue(); |
| 3942 |
|
3942 |
|
| 3943 |
for (int64_t Offset : Worklist) { |
3943 |
for (int64_t Offset : Worklist) { |
| 3944 |
Offset -= Step; |
3944 |
Offset -= Step; |
| 3945 |
GenerateOffset(G, Offset); |
3945 |
GenerateOffset(G, Offset); |
| 3946 |
} |
3946 |
} |
| 3947 |
} |
3947 |
} |
| 3948 |
} |
3948 |
} |
| 3949 |
} |
3949 |
} |
| 3950 |
for (int64_t Offset : Worklist) |
3950 |
for (int64_t Offset : Worklist) |
| 3951 |
GenerateOffset(G, Offset); |
3951 |
GenerateOffset(G, Offset); |
| 3952 |
|
3952 |
|
| 3953 |
int64_t Imm = ExtractImmediate(G, SE); |
3953 |
int64_t Imm = ExtractImmediate(G, SE); |
| 3954 |
if (G->isZero() || Imm == 0) |
3954 |
if (G->isZero() || Imm == 0) |
| 3955 |
return; |
3955 |
return; |
| 3956 |
Formula F = Base; |
3956 |
Formula F = Base; |
| 3957 |
F.BaseOffset = (uint64_t)F.BaseOffset + Imm; |
3957 |
F.BaseOffset = (uint64_t)F.BaseOffset + Imm; |
| 3958 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) |
3958 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) |
| 3959 |
return; |
3959 |
return; |
| 3960 |
if (IsScaledReg) { |
3960 |
if (IsScaledReg) { |
| 3961 |
F.ScaledReg = G; |
3961 |
F.ScaledReg = G; |
| 3962 |
} else { |
3962 |
} else { |
| 3963 |
F.BaseRegs[Idx] = G; |
3963 |
F.BaseRegs[Idx] = G; |
| 3964 |
// We may generate non canonical Formula if G is a recurrent expr reg |
3964 |
// We may generate non canonical Formula if G is a recurrent expr reg |
| 3965 |
// related with current loop while F.ScaledReg is not. |
3965 |
// related with current loop while F.ScaledReg is not. |
| 3966 |
F.canonicalize(*L); |
3966 |
F.canonicalize(*L); |
| 3967 |
} |
3967 |
} |
| 3968 |
(void)InsertFormula(LU, LUIdx, F); |
3968 |
(void)InsertFormula(LU, LUIdx, F); |
| 3969 |
} |
3969 |
} |
| 3970 |
|
3970 |
|
| 3971 |
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. |
3971 |
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. |
| 3972 |
void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, |
3972 |
void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, |
| 3973 |
Formula Base) { |
3973 |
Formula Base) { |
| 3974 |
// TODO: For now, just add the min and max offset, because it usually isn't |
3974 |
// TODO: For now, just add the min and max offset, because it usually isn't |
| 3975 |
// worthwhile looking at everything inbetween. |
3975 |
// worthwhile looking at everything inbetween. |
| 3976 |
SmallVector Worklist; |
3976 |
SmallVector Worklist; |
| 3977 |
Worklist.push_back(LU.MinOffset); |
3977 |
Worklist.push_back(LU.MinOffset); |
| 3978 |
if (LU.MaxOffset != LU.MinOffset) |
3978 |
if (LU.MaxOffset != LU.MinOffset) |
| 3979 |
Worklist.push_back(LU.MaxOffset); |
3979 |
Worklist.push_back(LU.MaxOffset); |
| 3980 |
|
3980 |
|
| 3981 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) |
3981 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) |
| 3982 |
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i); |
3982 |
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i); |
| 3983 |
if (Base.Scale == 1) |
3983 |
if (Base.Scale == 1) |
| 3984 |
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1, |
3984 |
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1, |
| 3985 |
/* IsScaledReg */ true); |
3985 |
/* IsScaledReg */ true); |
| 3986 |
} |
3986 |
} |
| 3987 |
|
3987 |
|
| 3988 |
/// For ICmpZero, check to see if we can scale up the comparison. For example, x |
3988 |
/// For ICmpZero, check to see if we can scale up the comparison. For example, x |
| 3989 |
/// == y -> x*c == y*c. |
3989 |
/// == y -> x*c == y*c. |
| 3990 |
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, |
3990 |
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, |
| 3991 |
Formula Base) { |
3991 |
Formula Base) { |
| 3992 |
if (LU.Kind != LSRUse::ICmpZero) return; |
3992 |
if (LU.Kind != LSRUse::ICmpZero) return; |
| 3993 |
|
3993 |
|
| 3994 |
// Determine the integer type for the base formula. |
3994 |
// Determine the integer type for the base formula. |
| 3995 |
Type *IntTy = Base.getType(); |
3995 |
Type *IntTy = Base.getType(); |
| 3996 |
if (!IntTy) return; |
3996 |
if (!IntTy) return; |
| 3997 |
if (SE.getTypeSizeInBits(IntTy) > 64) return; |
3997 |
if (SE.getTypeSizeInBits(IntTy) > 64) return; |
| 3998 |
|
3998 |
|
| 3999 |
// Don't do this if there is more than one offset. |
3999 |
// Don't do this if there is more than one offset. |
| 4000 |
if (LU.MinOffset != LU.MaxOffset) return; |
4000 |
if (LU.MinOffset != LU.MaxOffset) return; |
| 4001 |
|
4001 |
|
| 4002 |
// Check if transformation is valid. It is illegal to multiply pointer. |
4002 |
// Check if transformation is valid. It is illegal to multiply pointer. |
| 4003 |
if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy()) |
4003 |
if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy()) |
| 4004 |
return; |
4004 |
return; |
| 4005 |
for (const SCEV *BaseReg : Base.BaseRegs) |
4005 |
for (const SCEV *BaseReg : Base.BaseRegs) |
| 4006 |
if (BaseReg->getType()->isPointerTy()) |
4006 |
if (BaseReg->getType()->isPointerTy()) |
| 4007 |
return; |
4007 |
return; |
| 4008 |
assert(!Base.BaseGV && "ICmpZero use is not legal!"); |
4008 |
assert(!Base.BaseGV && "ICmpZero use is not legal!"); |
| 4009 |
|
4009 |
|
| 4010 |
// Check each interesting stride. |
4010 |
// Check each interesting stride. |
| 4011 |
for (int64_t Factor : Factors) { |
4011 |
for (int64_t Factor : Factors) { |
| 4012 |
// Check that Factor can be represented by IntTy |
4012 |
// Check that Factor can be represented by IntTy |
| 4013 |
if (!ConstantInt::isValueValidForType(IntTy, Factor)) |
4013 |
if (!ConstantInt::isValueValidForType(IntTy, Factor)) |
| 4014 |
continue; |
4014 |
continue; |
| 4015 |
// Check that the multiplication doesn't overflow. |
4015 |
// Check that the multiplication doesn't overflow. |
| 4016 |
if (Base.BaseOffset == std::numeric_limits::min() && Factor == -1) |
4016 |
if (Base.BaseOffset == std::numeric_limits::min() && Factor == -1) |
| 4017 |
continue; |
4017 |
continue; |
| 4018 |
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor; |
4018 |
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor; |
| 4019 |
assert(Factor != 0 && "Zero factor not expected!"); |
4019 |
assert(Factor != 0 && "Zero factor not expected!"); |
| 4020 |
if (NewBaseOffset / Factor != Base.BaseOffset) |
4020 |
if (NewBaseOffset / Factor != Base.BaseOffset) |
| 4021 |
continue; |
4021 |
continue; |
| 4022 |
// If the offset will be truncated at this use, check that it is in bounds. |
4022 |
// If the offset will be truncated at this use, check that it is in bounds. |
| 4023 |
if (!IntTy->isPointerTy() && |
4023 |
if (!IntTy->isPointerTy() && |
| 4024 |
!ConstantInt::isValueValidForType(IntTy, NewBaseOffset)) |
4024 |
!ConstantInt::isValueValidForType(IntTy, NewBaseOffset)) |
| 4025 |
continue; |
4025 |
continue; |
| 4026 |
|
4026 |
|
| 4027 |
// Check that multiplying with the use offset doesn't overflow. |
4027 |
// Check that multiplying with the use offset doesn't overflow. |
| 4028 |
int64_t Offset = LU.MinOffset; |
4028 |
int64_t Offset = LU.MinOffset; |
| 4029 |
if (Offset == std::numeric_limits::min() && Factor == -1) |
4029 |
if (Offset == std::numeric_limits::min() && Factor == -1) |
| 4030 |
continue; |
4030 |
continue; |
| 4031 |
Offset = (uint64_t)Offset * Factor; |
4031 |
Offset = (uint64_t)Offset * Factor; |
| 4032 |
if (Offset / Factor != LU.MinOffset) |
4032 |
if (Offset / Factor != LU.MinOffset) |
| 4033 |
continue; |
4033 |
continue; |
| 4034 |
// If the offset will be truncated at this use, check that it is in bounds. |
4034 |
// If the offset will be truncated at this use, check that it is in bounds. |
| 4035 |
if (!IntTy->isPointerTy() && |
4035 |
if (!IntTy->isPointerTy() && |
| 4036 |
!ConstantInt::isValueValidForType(IntTy, Offset)) |
4036 |
!ConstantInt::isValueValidForType(IntTy, Offset)) |
| 4037 |
continue; |
4037 |
continue; |
| 4038 |
|
4038 |
|
| 4039 |
Formula F = Base; |
4039 |
Formula F = Base; |
| 4040 |
F.BaseOffset = NewBaseOffset; |
4040 |
F.BaseOffset = NewBaseOffset; |
| 4041 |
|
4041 |
|
| 4042 |
// Check that this scale is legal. |
4042 |
// Check that this scale is legal. |
| 4043 |
if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F)) |
4043 |
if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F)) |
| 4044 |
continue; |
4044 |
continue; |
| 4045 |
|
4045 |
|
| 4046 |
// Compensate for the use having MinOffset built into it. |
4046 |
// Compensate for the use having MinOffset built into it. |
| 4047 |
F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset; |
4047 |
F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset; |
| 4048 |
|
4048 |
|
| 4049 |
const SCEV *FactorS = SE.getConstant(IntTy, Factor); |
4049 |
const SCEV *FactorS = SE.getConstant(IntTy, Factor); |
| 4050 |
|
4050 |
|
| 4051 |
// Check that multiplying with each base register doesn't overflow. |
4051 |
// Check that multiplying with each base register doesn't overflow. |
| 4052 |
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) { |
4052 |
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) { |
| 4053 |
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS); |
4053 |
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS); |
| 4054 |
if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i]) |
4054 |
if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i]) |
| 4055 |
goto next; |
4055 |
goto next; |
| 4056 |
} |
4056 |
} |
| 4057 |
|
4057 |
|
| 4058 |
// Check that multiplying with the scaled register doesn't overflow. |
4058 |
// Check that multiplying with the scaled register doesn't overflow. |
| 4059 |
if (F.ScaledReg) { |
4059 |
if (F.ScaledReg) { |
| 4060 |
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS); |
4060 |
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS); |
| 4061 |
if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg) |
4061 |
if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg) |
| 4062 |
continue; |
4062 |
continue; |
| 4063 |
} |
4063 |
} |
| 4064 |
|
4064 |
|
| 4065 |
// Check that multiplying with the unfolded offset doesn't overflow. |
4065 |
// Check that multiplying with the unfolded offset doesn't overflow. |
| 4066 |
if (F.UnfoldedOffset != 0) { |
4066 |
if (F.UnfoldedOffset != 0) { |
| 4067 |
if (F.UnfoldedOffset == std::numeric_limits::min() && |
4067 |
if (F.UnfoldedOffset == std::numeric_limits::min() && |
| 4068 |
Factor == -1) |
4068 |
Factor == -1) |
| 4069 |
continue; |
4069 |
continue; |
| 4070 |
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; |
4070 |
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; |
| 4071 |
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) |
4071 |
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) |
| 4072 |
continue; |
4072 |
continue; |
| 4073 |
// If the offset will be truncated, check that it is in bounds. |
4073 |
// If the offset will be truncated, check that it is in bounds. |
| 4074 |
if (!IntTy->isPointerTy() && |
4074 |
if (!IntTy->isPointerTy() && |
| 4075 |
!ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset)) |
4075 |
!ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset)) |
| 4076 |
continue; |
4076 |
continue; |
| 4077 |
} |
4077 |
} |
| 4078 |
|
4078 |
|
| 4079 |
// If we make it here and it's legal, add it. |
4079 |
// If we make it here and it's legal, add it. |
| 4080 |
(void)InsertFormula(LU, LUIdx, F); |
4080 |
(void)InsertFormula(LU, LUIdx, F); |
| 4081 |
next:; |
4081 |
next:; |
| 4082 |
} |
4082 |
} |
| 4083 |
} |
4083 |
} |
| 4084 |
|
4084 |
|
| 4085 |
/// Generate stride factor reuse formulae by making use of scaled-offset address |
4085 |
/// Generate stride factor reuse formulae by making use of scaled-offset address |
| 4086 |
/// modes, for example. |
4086 |
/// modes, for example. |
| 4087 |
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { |
4087 |
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { |
| 4088 |
// Determine the integer type for the base formula. |
4088 |
// Determine the integer type for the base formula. |
| 4089 |
Type *IntTy = Base.getType(); |
4089 |
Type *IntTy = Base.getType(); |
| 4090 |
if (!IntTy) return; |
4090 |
if (!IntTy) return; |
| 4091 |
|
4091 |
|
| 4092 |
// If this Formula already has a scaled register, we can't add another one. |
4092 |
// If this Formula already has a scaled register, we can't add another one. |
| 4093 |
// Try to unscale the formula to generate a better scale. |
4093 |
// Try to unscale the formula to generate a better scale. |
| 4094 |
if (Base.Scale != 0 && !Base.unscale()) |
4094 |
if (Base.Scale != 0 && !Base.unscale()) |
| 4095 |
return; |
4095 |
return; |
| 4096 |
|
4096 |
|
| 4097 |
assert(Base.Scale == 0 && "unscale did not did its job!"); |
4097 |
assert(Base.Scale == 0 && "unscale did not did its job!"); |
| 4098 |
|
4098 |
|
| 4099 |
// Check each interesting stride. |
4099 |
// Check each interesting stride. |
| 4100 |
for (int64_t Factor : Factors) { |
4100 |
for (int64_t Factor : Factors) { |
| 4101 |
Base.Scale = Factor; |
4101 |
Base.Scale = Factor; |
| 4102 |
Base.HasBaseReg = Base.BaseRegs.size() > 1; |
4102 |
Base.HasBaseReg = Base.BaseRegs.size() > 1; |
| 4103 |
// Check whether this scale is going to be legal. |
4103 |
// Check whether this scale is going to be legal. |
| 4104 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, |
4104 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, |
| 4105 |
Base)) { |
4105 |
Base)) { |
| 4106 |
// As a special-case, handle special out-of-loop Basic users specially. |
4106 |
// As a special-case, handle special out-of-loop Basic users specially. |
| 4107 |
// TODO: Reconsider this special case. |
4107 |
// TODO: Reconsider this special case. |
| 4108 |
if (LU.Kind == LSRUse::Basic && |
4108 |
if (LU.Kind == LSRUse::Basic && |
| 4109 |
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special, |
4109 |
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special, |
| 4110 |
LU.AccessTy, Base) && |
4110 |
LU.AccessTy, Base) && |
| 4111 |
LU.AllFixupsOutsideLoop) |
4111 |
LU.AllFixupsOutsideLoop) |
| 4112 |
LU.Kind = LSRUse::Special; |
4112 |
LU.Kind = LSRUse::Special; |
| 4113 |
else |
4113 |
else |
| 4114 |
continue; |
4114 |
continue; |
| 4115 |
} |
4115 |
} |
| 4116 |
// For an ICmpZero, negating a solitary base register won't lead to |
4116 |
// For an ICmpZero, negating a solitary base register won't lead to |
| 4117 |
// new solutions. |
4117 |
// new solutions. |
| 4118 |
if (LU.Kind == LSRUse::ICmpZero && |
4118 |
if (LU.Kind == LSRUse::ICmpZero && |
| 4119 |
!Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV) |
4119 |
!Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV) |
| 4120 |
continue; |
4120 |
continue; |
| 4121 |
// For each addrec base reg, if its loop is current loop, apply the scale. |
4121 |
// For each addrec base reg, if its loop is current loop, apply the scale. |
| 4122 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { |
4122 |
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { |
| 4123 |
const SCEVAddRecExpr *AR = dyn_cast(Base.BaseRegs[i]); |
4123 |
const SCEVAddRecExpr *AR = dyn_cast(Base.BaseRegs[i]); |
| 4124 |
if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) { |
4124 |
if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) { |
| 4125 |
const SCEV *FactorS = SE.getConstant(IntTy, Factor); |
4125 |
const SCEV *FactorS = SE.getConstant(IntTy, Factor); |
| 4126 |
if (FactorS->isZero()) |
4126 |
if (FactorS->isZero()) |
| 4127 |
continue; |
4127 |
continue; |
| 4128 |
// Divide out the factor, ignoring high bits, since we'll be |
4128 |
// Divide out the factor, ignoring high bits, since we'll be |
| 4129 |
// scaling the value back up in the end. |
4129 |
// scaling the value back up in the end. |
| 4130 |
if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) |
4130 |
if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) |
| 4131 |
if (!Quotient->isZero()) { |
4131 |
if (!Quotient->isZero()) { |
| 4132 |
// TODO: This could be optimized to avoid all the copying. |
4132 |
// TODO: This could be optimized to avoid all the copying. |
| 4133 |
Formula F = Base; |
4133 |
Formula F = Base; |
| 4134 |
F.ScaledReg = Quotient; |
4134 |
F.ScaledReg = Quotient; |
| 4135 |
F.deleteBaseReg(F.BaseRegs[i]); |
4135 |
F.deleteBaseReg(F.BaseRegs[i]); |
| 4136 |
// The canonical representation of 1*reg is reg, which is already in |
4136 |
// The canonical representation of 1*reg is reg, which is already in |
| 4137 |
// Base. In that case, do not try to insert the formula, it will be |
4137 |
// Base. In that case, do not try to insert the formula, it will be |
| 4138 |
// rejected anyway. |
4138 |
// rejected anyway. |
| 4139 |
if (F.Scale == 1 && (F.BaseRegs.empty() || |
4139 |
if (F.Scale == 1 && (F.BaseRegs.empty() || |
| 4140 |
(AR->getLoop() != L && LU.AllFixupsOutsideLoop))) |
4140 |
(AR->getLoop() != L && LU.AllFixupsOutsideLoop))) |
| 4141 |
continue; |
4141 |
continue; |
| 4142 |
// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate |
4142 |
// If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate |
| 4143 |
// non canonical Formula with ScaledReg's loop not being L. |
4143 |
// non canonical Formula with ScaledReg's loop not being L. |
| 4144 |
if (F.Scale == 1 && LU.AllFixupsOutsideLoop) |
4144 |
if (F.Scale == 1 && LU.AllFixupsOutsideLoop) |
| 4145 |
F.canonicalize(*L); |
4145 |
F.canonicalize(*L); |
| 4146 |
(void)InsertFormula(LU, LUIdx, F); |
4146 |
(void)InsertFormula(LU, LUIdx, F); |
| 4147 |
} |
4147 |
} |
| 4148 |
} |
4148 |
} |
| 4149 |
} |
4149 |
} |
| 4150 |
} |
4150 |
} |
| 4151 |
} |
4151 |
} |
| 4152 |
|
4152 |
|
| 4153 |
/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops. |
4153 |
/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops. |
| 4154 |
/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then |
4154 |
/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then |
| 4155 |
/// perform the extension/truncate and normalize again, as the normalized form |
4155 |
/// perform the extension/truncate and normalize again, as the normalized form |
| 4156 |
/// can result in folds that are not valid in the post-inc use contexts. The |
4156 |
/// can result in folds that are not valid in the post-inc use contexts. The |
| 4157 |
/// expressions for all PostIncLoopSets must match, otherwise return nullptr. |
4157 |
/// expressions for all PostIncLoopSets must match, otherwise return nullptr. |
| 4158 |
static const SCEV * |
4158 |
static const SCEV * |
| 4159 |
getAnyExtendConsideringPostIncUses(ArrayRef Loops, |
4159 |
getAnyExtendConsideringPostIncUses(ArrayRef Loops, |
| 4160 |
const SCEV *Expr, Type *ToTy, |
4160 |
const SCEV *Expr, Type *ToTy, |
| 4161 |
ScalarEvolution &SE) { |
4161 |
ScalarEvolution &SE) { |
| 4162 |
const SCEV *Result = nullptr; |
4162 |
const SCEV *Result = nullptr; |
| 4163 |
for (auto &L : Loops) { |
4163 |
for (auto &L : Loops) { |
| 4164 |
auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE); |
4164 |
auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE); |
| 4165 |
const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy); |
4165 |
const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy); |
| 4166 |
const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE); |
4166 |
const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE); |
| 4167 |
if (!New || (Result && New != Result)) |
4167 |
if (!New || (Result && New != Result)) |
| 4168 |
return nullptr; |
4168 |
return nullptr; |
| 4169 |
Result = New; |
4169 |
Result = New; |
| 4170 |
} |
4170 |
} |
| 4171 |
|
4171 |
|
| 4172 |
assert(Result && "failed to create expression"); |
4172 |
assert(Result && "failed to create expression"); |
| 4173 |
return Result; |
4173 |
return Result; |
| 4174 |
} |
4174 |
} |
| 4175 |
|
4175 |
|
| 4176 |
/// Generate reuse formulae from different IV types. |
4176 |
/// Generate reuse formulae from different IV types. |
| 4177 |
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { |
4177 |
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { |
| 4178 |
// Don't bother truncating symbolic values. |
4178 |
// Don't bother truncating symbolic values. |
| 4179 |
if (Base.BaseGV) return; |
4179 |
if (Base.BaseGV) return; |
| 4180 |
|
4180 |
|
| 4181 |
// Determine the integer type for the base formula. |
4181 |
// Determine the integer type for the base formula. |
| 4182 |
Type *DstTy = Base.getType(); |
4182 |
Type *DstTy = Base.getType(); |
| 4183 |
if (!DstTy) return; |
4183 |
if (!DstTy) return; |
| 4184 |
if (DstTy->isPointerTy()) |
4184 |
if (DstTy->isPointerTy()) |
| 4185 |
return; |
4185 |
return; |
| 4186 |
|
4186 |
|
| 4187 |
// It is invalid to extend a pointer type so exit early if ScaledReg or |
4187 |
// It is invalid to extend a pointer type so exit early if ScaledReg or |
| 4188 |
// any of the BaseRegs are pointers. |
4188 |
// any of the BaseRegs are pointers. |
| 4189 |
if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy()) |
4189 |
if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy()) |
| 4190 |
return; |
4190 |
return; |
| 4191 |
if (any_of(Base.BaseRegs, |
4191 |
if (any_of(Base.BaseRegs, |
| 4192 |
[](const SCEV *S) { return S->getType()->isPointerTy(); })) |
4192 |
[](const SCEV *S) { return S->getType()->isPointerTy(); })) |
| 4193 |
return; |
4193 |
return; |
| 4194 |
|
4194 |
|
| 4195 |
SmallVector Loops; |
4195 |
SmallVector Loops; |
| 4196 |
for (auto &LF : LU.Fixups) |
4196 |
for (auto &LF : LU.Fixups) |
| 4197 |
Loops.push_back(LF.PostIncLoops); |
4197 |
Loops.push_back(LF.PostIncLoops); |
| 4198 |
|
4198 |
|
| 4199 |
for (Type *SrcTy : Types) { |
4199 |
for (Type *SrcTy : Types) { |
| 4200 |
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) { |
4200 |
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) { |
| 4201 |
Formula F = Base; |
4201 |
Formula F = Base; |
| 4202 |
|
4202 |
|
| 4203 |
// Sometimes SCEV is able to prove zero during ext transform. It may |
4203 |
// Sometimes SCEV is able to prove zero during ext transform. It may |
| 4204 |
// happen if SCEV did not do all possible transforms while creating the |
4204 |
// happen if SCEV did not do all possible transforms while creating the |
| 4205 |
// initial node (maybe due to depth limitations), but it can do them while |
4205 |
// initial node (maybe due to depth limitations), but it can do them while |
| 4206 |
// taking ext. |
4206 |
// taking ext. |
| 4207 |
if (F.ScaledReg) { |
4207 |
if (F.ScaledReg) { |
| 4208 |
const SCEV *NewScaledReg = |
4208 |
const SCEV *NewScaledReg = |
| 4209 |
getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE); |
4209 |
getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE); |
| 4210 |
if (!NewScaledReg || NewScaledReg->isZero()) |
4210 |
if (!NewScaledReg || NewScaledReg->isZero()) |
| 4211 |
continue; |
4211 |
continue; |
| 4212 |
F.ScaledReg = NewScaledReg; |
4212 |
F.ScaledReg = NewScaledReg; |
| 4213 |
} |
4213 |
} |
| 4214 |
bool HasZeroBaseReg = false; |
4214 |
bool HasZeroBaseReg = false; |
| 4215 |
for (const SCEV *&BaseReg : F.BaseRegs) { |
4215 |
for (const SCEV *&BaseReg : F.BaseRegs) { |
| 4216 |
const SCEV *NewBaseReg = |
4216 |
const SCEV *NewBaseReg = |
| 4217 |
getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE); |
4217 |
getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE); |
| 4218 |
if (!NewBaseReg || NewBaseReg->isZero()) { |
4218 |
if (!NewBaseReg || NewBaseReg->isZero()) { |
| 4219 |
HasZeroBaseReg = true; |
4219 |
HasZeroBaseReg = true; |
| 4220 |
break; |
4220 |
break; |
| 4221 |
} |
4221 |
} |
| 4222 |
BaseReg = NewBaseReg; |
4222 |
BaseReg = NewBaseReg; |
| 4223 |
} |
4223 |
} |
| 4224 |
if (HasZeroBaseReg) |
4224 |
if (HasZeroBaseReg) |
| 4225 |
continue; |
4225 |
continue; |
| 4226 |
|
4226 |
|
| 4227 |
// TODO: This assumes we've done basic processing on all uses and |
4227 |
// TODO: This assumes we've done basic processing on all uses and |
| 4228 |
// have an idea what the register usage is. |
4228 |
// have an idea what the register usage is. |
| 4229 |
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) |
4229 |
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) |
| 4230 |
continue; |
4230 |
continue; |
| 4231 |
|
4231 |
|
| 4232 |
F.canonicalize(*L); |
4232 |
F.canonicalize(*L); |
| 4233 |
(void)InsertFormula(LU, LUIdx, F); |
4233 |
(void)InsertFormula(LU, LUIdx, F); |
| 4234 |
} |
4234 |
} |
| 4235 |
} |
4235 |
} |
| 4236 |
} |
4236 |
} |
| 4237 |
|
4237 |
|
| 4238 |
namespace { |
4238 |
namespace { |
| 4239 |
|
4239 |
|
| 4240 |
/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer |
4240 |
/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer |
| 4241 |
/// modifications so that the search phase doesn't have to worry about the data |
4241 |
/// modifications so that the search phase doesn't have to worry about the data |
| 4242 |
/// structures moving underneath it. |
4242 |
/// structures moving underneath it. |
| 4243 |
struct WorkItem { |
4243 |
struct WorkItem { |
| 4244 |
size_t LUIdx; |
4244 |
size_t LUIdx; |
| 4245 |
int64_t Imm; |
4245 |
int64_t Imm; |
| 4246 |
const SCEV *OrigReg; |
4246 |
const SCEV *OrigReg; |
| 4247 |
|
4247 |
|
| 4248 |
WorkItem(size_t LI, int64_t I, const SCEV *R) |
4248 |
WorkItem(size_t LI, int64_t I, const SCEV *R) |
| 4249 |
: LUIdx(LI), Imm(I), OrigReg(R) {} |
4249 |
: LUIdx(LI), Imm(I), OrigReg(R) {} |
| 4250 |
|
4250 |
|
| 4251 |
void print(raw_ostream &OS) const; |
4251 |
void print(raw_ostream &OS) const; |
| 4252 |
void dump() const; |
4252 |
void dump() const; |
| 4253 |
}; |
4253 |
}; |
| 4254 |
|
4254 |
|
| 4255 |
} // end anonymous namespace |
4255 |
} // end anonymous namespace |
| 4256 |
|
4256 |
|
| 4257 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
4257 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 4258 |
void WorkItem::print(raw_ostream &OS) const { |
4258 |
void WorkItem::print(raw_ostream &OS) const { |
| 4259 |
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx |
4259 |
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx |
| 4260 |
<< " , add offset " << Imm; |
4260 |
<< " , add offset " << Imm; |
| 4261 |
} |
4261 |
} |
| 4262 |
|
4262 |
|
| 4263 |
LLVM_DUMP_METHOD void WorkItem::dump() const { |
4263 |
LLVM_DUMP_METHOD void WorkItem::dump() const { |
| 4264 |
print(errs()); errs() << '\n'; |
4264 |
print(errs()); errs() << '\n'; |
| 4265 |
} |
4265 |
} |
| 4266 |
#endif |
4266 |
#endif |
| 4267 |
|
4267 |
|
| 4268 |
/// Look for registers which are a constant distance apart and try to form reuse |
4268 |
/// Look for registers which are a constant distance apart and try to form reuse |
| 4269 |
/// opportunities between them. |
4269 |
/// opportunities between them. |
| 4270 |
void LSRInstance::GenerateCrossUseConstantOffsets() { |
4270 |
void LSRInstance::GenerateCrossUseConstantOffsets() { |
| 4271 |
// Group the registers by their value without any added constant offset. |
4271 |
// Group the registers by their value without any added constant offset. |
| 4272 |
using ImmMapTy = std::map; |
4272 |
using ImmMapTy = std::map; |
| 4273 |
|
4273 |
|
| 4274 |
DenseMap Map; |
4274 |
DenseMap Map; |
| 4275 |
DenseMap UsedByIndicesMap; |
4275 |
DenseMap UsedByIndicesMap; |
| 4276 |
SmallVector Sequence; |
4276 |
SmallVector Sequence; |
| 4277 |
for (const SCEV *Use : RegUses) { |
4277 |
for (const SCEV *Use : RegUses) { |
| 4278 |
const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify. |
4278 |
const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify. |
| 4279 |
int64_t Imm = ExtractImmediate(Reg, SE); |
4279 |
int64_t Imm = ExtractImmediate(Reg, SE); |
| 4280 |
auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy())); |
4280 |
auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy())); |
| 4281 |
if (Pair.second) |
4281 |
if (Pair.second) |
| 4282 |
Sequence.push_back(Reg); |
4282 |
Sequence.push_back(Reg); |
| 4283 |
Pair.first->second.insert(std::make_pair(Imm, Use)); |
4283 |
Pair.first->second.insert(std::make_pair(Imm, Use)); |
| 4284 |
UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use); |
4284 |
UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use); |
| 4285 |
} |
4285 |
} |
| 4286 |
|
4286 |
|
| 4287 |
// Now examine each set of registers with the same base value. Build up |
4287 |
// Now examine each set of registers with the same base value. Build up |
| 4288 |
// a list of work to do and do the work in a separate step so that we're |
4288 |
// a list of work to do and do the work in a separate step so that we're |
| 4289 |
// not adding formulae and register counts while we're searching. |
4289 |
// not adding formulae and register counts while we're searching. |
| 4290 |
SmallVector WorkItems; |
4290 |
SmallVector WorkItems; |
| 4291 |
SmallSet, 32> UniqueItems; |
4291 |
SmallSet, 32> UniqueItems; |
| 4292 |
for (const SCEV *Reg : Sequence) { |
4292 |
for (const SCEV *Reg : Sequence) { |
| 4293 |
const ImmMapTy &Imms = Map.find(Reg)->second; |
4293 |
const ImmMapTy &Imms = Map.find(Reg)->second; |
| 4294 |
|
4294 |
|
| 4295 |
// It's not worthwhile looking for reuse if there's only one offset. |
4295 |
// It's not worthwhile looking for reuse if there's only one offset. |
| 4296 |
if (Imms.size() == 1) |
4296 |
if (Imms.size() == 1) |
| 4297 |
continue; |
4297 |
continue; |
| 4298 |
|
4298 |
|
| 4299 |
LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; |
4299 |
LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; |
| 4300 |
for (const auto &Entry |
4300 |
for (const auto &Entry |
| 4301 |
: Imms) dbgs() |
4301 |
: Imms) dbgs() |
| 4302 |
<< ' ' << Entry.first; |
4302 |
<< ' ' << Entry.first; |
| 4303 |
dbgs() << '\n'); |
4303 |
dbgs() << '\n'); |
| 4304 |
|
4304 |
|
| 4305 |
// Examine each offset. |
4305 |
// Examine each offset. |
| 4306 |
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); |
4306 |
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); |
| 4307 |
J != JE; ++J) { |
4307 |
J != JE; ++J) { |
| 4308 |
const SCEV *OrigReg = J->second; |
4308 |
const SCEV *OrigReg = J->second; |
| 4309 |
|
4309 |
|
| 4310 |
int64_t JImm = J->first; |
4310 |
int64_t JImm = J->first; |
| 4311 |
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg); |
4311 |
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg); |
| 4312 |
|
4312 |
|
| 4313 |
if (!isa(OrigReg) && |
4313 |
if (!isa(OrigReg) && |
| 4314 |
UsedByIndicesMap[Reg].count() == 1) { |
4314 |
UsedByIndicesMap[Reg].count() == 1) { |
| 4315 |
LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg |
4315 |
LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg |
| 4316 |
<< '\n'); |
4316 |
<< '\n'); |
| 4317 |
continue; |
4317 |
continue; |
| 4318 |
} |
4318 |
} |
| 4319 |
|
4319 |
|
| 4320 |
// Conservatively examine offsets between this orig reg a few selected |
4320 |
// Conservatively examine offsets between this orig reg a few selected |
| 4321 |
// other orig regs. |
4321 |
// other orig regs. |
| 4322 |
int64_t First = Imms.begin()->first; |
4322 |
int64_t First = Imms.begin()->first; |
| 4323 |
int64_t Last = std::prev(Imms.end())->first; |
4323 |
int64_t Last = std::prev(Imms.end())->first; |
| 4324 |
// Compute (First + Last) / 2 without overflow using the fact that |
4324 |
// Compute (First + Last) / 2 without overflow using the fact that |
| 4325 |
// First + Last = 2 * (First + Last) + (First ^ Last). |
4325 |
// First + Last = 2 * (First + Last) + (First ^ Last). |
| 4326 |
int64_t Avg = (First & Last) + ((First ^ Last) >> 1); |
4326 |
int64_t Avg = (First & Last) + ((First ^ Last) >> 1); |
| 4327 |
// If the result is negative and First is odd and Last even (or vice versa), |
4327 |
// If the result is negative and First is odd and Last even (or vice versa), |
| 4328 |
// we rounded towards -inf. Add 1 in that case, to round towards 0. |
4328 |
// we rounded towards -inf. Add 1 in that case, to round towards 0. |
| 4329 |
Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63)); |
4329 |
Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63)); |
| 4330 |
ImmMapTy::const_iterator OtherImms[] = { |
4330 |
ImmMapTy::const_iterator OtherImms[] = { |
| 4331 |
Imms.begin(), std::prev(Imms.end()), |
4331 |
Imms.begin(), std::prev(Imms.end()), |
| 4332 |
Imms.lower_bound(Avg)}; |
4332 |
Imms.lower_bound(Avg)}; |
| 4333 |
for (const auto &M : OtherImms) { |
4333 |
for (const auto &M : OtherImms) { |
| 4334 |
if (M == J || M == JE) continue; |
4334 |
if (M == J || M == JE) continue; |
| 4335 |
|
4335 |
|
| 4336 |
// Compute the difference between the two. |
4336 |
// Compute the difference between the two. |
| 4337 |
int64_t Imm = (uint64_t)JImm - M->first; |
4337 |
int64_t Imm = (uint64_t)JImm - M->first; |
| 4338 |
for (unsigned LUIdx : UsedByIndices.set_bits()) |
4338 |
for (unsigned LUIdx : UsedByIndices.set_bits()) |
| 4339 |
// Make a memo of this use, offset, and register tuple. |
4339 |
// Make a memo of this use, offset, and register tuple. |
| 4340 |
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second) |
4340 |
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second) |
| 4341 |
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); |
4341 |
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); |
| 4342 |
} |
4342 |
} |
| 4343 |
} |
4343 |
} |
| 4344 |
} |
4344 |
} |
| 4345 |
|
4345 |
|
| 4346 |
Map.clear(); |
4346 |
Map.clear(); |
| 4347 |
Sequence.clear(); |
4347 |
Sequence.clear(); |
| 4348 |
UsedByIndicesMap.clear(); |
4348 |
UsedByIndicesMap.clear(); |
| 4349 |
UniqueItems.clear(); |
4349 |
UniqueItems.clear(); |
| 4350 |
|
4350 |
|
| 4351 |
// Now iterate through the worklist and add new formulae. |
4351 |
// Now iterate through the worklist and add new formulae. |
| 4352 |
for (const WorkItem &WI : WorkItems) { |
4352 |
for (const WorkItem &WI : WorkItems) { |
| 4353 |
size_t LUIdx = WI.LUIdx; |
4353 |
size_t LUIdx = WI.LUIdx; |
| 4354 |
LSRUse &LU = Uses[LUIdx]; |
4354 |
LSRUse &LU = Uses[LUIdx]; |
| 4355 |
int64_t Imm = WI.Imm; |
4355 |
int64_t Imm = WI.Imm; |
| 4356 |
const SCEV *OrigReg = WI.OrigReg; |
4356 |
const SCEV *OrigReg = WI.OrigReg; |
| 4357 |
|
4357 |
|
| 4358 |
Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); |
4358 |
Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); |
| 4359 |
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); |
4359 |
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); |
| 4360 |
unsigned BitWidth = SE.getTypeSizeInBits(IntTy); |
4360 |
unsigned BitWidth = SE.getTypeSizeInBits(IntTy); |
| 4361 |
|
4361 |
|
| 4362 |
// TODO: Use a more targeted data structure. |
4362 |
// TODO: Use a more targeted data structure. |
| 4363 |
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { |
4363 |
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { |
| 4364 |
Formula F = LU.Formulae[L]; |
4364 |
Formula F = LU.Formulae[L]; |
| 4365 |
// FIXME: The code for the scaled and unscaled registers looks |
4365 |
// FIXME: The code for the scaled and unscaled registers looks |
| 4366 |
// very similar but slightly different. Investigate if they |
4366 |
// very similar but slightly different. Investigate if they |
| 4367 |
// could be merged. That way, we would not have to unscale the |
4367 |
// could be merged. That way, we would not have to unscale the |
| 4368 |
// Formula. |
4368 |
// Formula. |
| 4369 |
F.unscale(); |
4369 |
F.unscale(); |
| 4370 |
// Use the immediate in the scaled register. |
4370 |
// Use the immediate in the scaled register. |
| 4371 |
if (F.ScaledReg == OrigReg) { |
4371 |
if (F.ScaledReg == OrigReg) { |
| 4372 |
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; |
4372 |
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; |
| 4373 |
// Don't create 50 + reg(-50). |
4373 |
// Don't create 50 + reg(-50). |
| 4374 |
if (F.referencesReg(SE.getSCEV( |
4374 |
if (F.referencesReg(SE.getSCEV( |
| 4375 |
ConstantInt::get(IntTy, -(uint64_t)Offset)))) |
4375 |
ConstantInt::get(IntTy, -(uint64_t)Offset)))) |
| 4376 |
continue; |
4376 |
continue; |
| 4377 |
Formula NewF = F; |
4377 |
Formula NewF = F; |
| 4378 |
NewF.BaseOffset = Offset; |
4378 |
NewF.BaseOffset = Offset; |
| 4379 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, |
4379 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, |
| 4380 |
NewF)) |
4380 |
NewF)) |
| 4381 |
continue; |
4381 |
continue; |
| 4382 |
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); |
4382 |
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); |
| 4383 |
|
4383 |
|
| 4384 |
// If the new scale is a constant in a register, and adding the constant |
4384 |
// If the new scale is a constant in a register, and adding the constant |
| 4385 |
// value to the immediate would produce a value closer to zero than the |
4385 |
// value to the immediate would produce a value closer to zero than the |
| 4386 |
// immediate itself, then the formula isn't worthwhile. |
4386 |
// immediate itself, then the formula isn't worthwhile. |
| 4387 |
if (const SCEVConstant *C = dyn_cast(NewF.ScaledReg)) |
4387 |
if (const SCEVConstant *C = dyn_cast(NewF.ScaledReg)) |
| 4388 |
if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) && |
4388 |
if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) && |
| 4389 |
(C->getAPInt().abs() * APInt(BitWidth, F.Scale)) |
4389 |
(C->getAPInt().abs() * APInt(BitWidth, F.Scale)) |
| 4390 |
.ule(std::abs(NewF.BaseOffset))) |
4390 |
.ule(std::abs(NewF.BaseOffset))) |
| 4391 |
continue; |
4391 |
continue; |
| 4392 |
|
4392 |
|
| 4393 |
// OK, looks good. |
4393 |
// OK, looks good. |
| 4394 |
NewF.canonicalize(*this->L); |
4394 |
NewF.canonicalize(*this->L); |
| 4395 |
(void)InsertFormula(LU, LUIdx, NewF); |
4395 |
(void)InsertFormula(LU, LUIdx, NewF); |
| 4396 |
} else { |
4396 |
} else { |
| 4397 |
// Use the immediate in a base register. |
4397 |
// Use the immediate in a base register. |
| 4398 |
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) { |
4398 |
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) { |
| 4399 |
const SCEV *BaseReg = F.BaseRegs[N]; |
4399 |
const SCEV *BaseReg = F.BaseRegs[N]; |
| 4400 |
if (BaseReg != OrigReg) |
4400 |
if (BaseReg != OrigReg) |
| 4401 |
continue; |
4401 |
continue; |
| 4402 |
Formula NewF = F; |
4402 |
Formula NewF = F; |
| 4403 |
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; |
4403 |
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; |
| 4404 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, |
4404 |
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, |
| 4405 |
LU.Kind, LU.AccessTy, NewF)) { |
4405 |
LU.Kind, LU.AccessTy, NewF)) { |
| 4406 |
if (AMK == TTI::AMK_PostIndexed && |
4406 |
if (AMK == TTI::AMK_PostIndexed && |
| 4407 |
mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) |
4407 |
mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) |
| 4408 |
continue; |
4408 |
continue; |
| 4409 |
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) |
4409 |
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) |
| 4410 |
continue; |
4410 |
continue; |
| 4411 |
NewF = F; |
4411 |
NewF = F; |
| 4412 |
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; |
4412 |
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; |
| 4413 |
} |
4413 |
} |
| 4414 |
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); |
4414 |
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); |
| 4415 |
|
4415 |
|
| 4416 |
// If the new formula has a constant in a register, and adding the |
4416 |
// If the new formula has a constant in a register, and adding the |
| 4417 |
// constant value to the immediate would produce a value closer to |
4417 |
// constant value to the immediate would produce a value closer to |
| 4418 |
// zero than the immediate itself, then the formula isn't worthwhile. |
4418 |
// zero than the immediate itself, then the formula isn't worthwhile. |
| 4419 |
for (const SCEV *NewReg : NewF.BaseRegs) |
4419 |
for (const SCEV *NewReg : NewF.BaseRegs) |
| 4420 |
if (const SCEVConstant *C = dyn_cast(NewReg)) |
4420 |
if (const SCEVConstant *C = dyn_cast(NewReg)) |
| 4421 |
if ((C->getAPInt() + NewF.BaseOffset) |
4421 |
if ((C->getAPInt() + NewF.BaseOffset) |
| 4422 |
.abs() |
4422 |
.abs() |
| 4423 |
.slt(std::abs(NewF.BaseOffset)) && |
4423 |
.slt(std::abs(NewF.BaseOffset)) && |
| 4424 |
(C->getAPInt() + NewF.BaseOffset).countr_zero() >= |
4424 |
(C->getAPInt() + NewF.BaseOffset).countr_zero() >= |
| 4425 |
(unsigned)llvm::countr_zero(NewF.BaseOffset)) |
4425 |
(unsigned)llvm::countr_zero(NewF.BaseOffset)) |
| 4426 |
goto skip_formula; |
4426 |
goto skip_formula; |
| 4427 |
|
4427 |
|
| 4428 |
// Ok, looks good. |
4428 |
// Ok, looks good. |
| 4429 |
NewF.canonicalize(*this->L); |
4429 |
NewF.canonicalize(*this->L); |
| 4430 |
(void)InsertFormula(LU, LUIdx, NewF); |
4430 |
(void)InsertFormula(LU, LUIdx, NewF); |
| 4431 |
break; |
4431 |
break; |
| 4432 |
skip_formula:; |
4432 |
skip_formula:; |
| 4433 |
} |
4433 |
} |
| 4434 |
} |
4434 |
} |
| 4435 |
} |
4435 |
} |
| 4436 |
} |
4436 |
} |
| 4437 |
} |
4437 |
} |
| 4438 |
|
4438 |
|
| 4439 |
/// Generate formulae for each use. |
4439 |
/// Generate formulae for each use. |
| 4440 |
void |
4440 |
void |
| 4441 |
LSRInstance::GenerateAllReuseFormulae() { |
4441 |
LSRInstance::GenerateAllReuseFormulae() { |
| 4442 |
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan |
4442 |
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan |
| 4443 |
// queries are more precise. |
4443 |
// queries are more precise. |
| 4444 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4444 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4445 |
LSRUse &LU = Uses[LUIdx]; |
4445 |
LSRUse &LU = Uses[LUIdx]; |
| 4446 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
4446 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
| 4447 |
GenerateReassociations(LU, LUIdx, LU.Formulae[i]); |
4447 |
GenerateReassociations(LU, LUIdx, LU.Formulae[i]); |
| 4448 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
4448 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
| 4449 |
GenerateCombinations(LU, LUIdx, LU.Formulae[i]); |
4449 |
GenerateCombinations(LU, LUIdx, LU.Formulae[i]); |
| 4450 |
} |
4450 |
} |
| 4451 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4451 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4452 |
LSRUse &LU = Uses[LUIdx]; |
4452 |
LSRUse &LU = Uses[LUIdx]; |
| 4453 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
4453 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
| 4454 |
GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]); |
4454 |
GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]); |
| 4455 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
4455 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
| 4456 |
GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]); |
4456 |
GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]); |
| 4457 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
4457 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
| 4458 |
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]); |
4458 |
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]); |
| 4459 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
4459 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
| 4460 |
GenerateScales(LU, LUIdx, LU.Formulae[i]); |
4460 |
GenerateScales(LU, LUIdx, LU.Formulae[i]); |
| 4461 |
} |
4461 |
} |
| 4462 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4462 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4463 |
LSRUse &LU = Uses[LUIdx]; |
4463 |
LSRUse &LU = Uses[LUIdx]; |
| 4464 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
4464 |
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) |
| 4465 |
GenerateTruncates(LU, LUIdx, LU.Formulae[i]); |
4465 |
GenerateTruncates(LU, LUIdx, LU.Formulae[i]); |
| 4466 |
} |
4466 |
} |
| 4467 |
|
4467 |
|
| 4468 |
GenerateCrossUseConstantOffsets(); |
4468 |
GenerateCrossUseConstantOffsets(); |
| 4469 |
|
4469 |
|
| 4470 |
LLVM_DEBUG(dbgs() << "\n" |
4470 |
LLVM_DEBUG(dbgs() << "\n" |
| 4471 |
"After generating reuse formulae:\n"; |
4471 |
"After generating reuse formulae:\n"; |
| 4472 |
print_uses(dbgs())); |
4472 |
print_uses(dbgs())); |
| 4473 |
} |
4473 |
} |
| 4474 |
|
4474 |
|
| 4475 |
/// If there are multiple formulae with the same set of registers used |
4475 |
/// If there are multiple formulae with the same set of registers used |
| 4476 |
/// by other uses, pick the best one and delete the others. |
4476 |
/// by other uses, pick the best one and delete the others. |
| 4477 |
void LSRInstance::FilterOutUndesirableDedicatedRegisters() { |
4477 |
void LSRInstance::FilterOutUndesirableDedicatedRegisters() { |
| 4478 |
DenseSet VisitedRegs; |
4478 |
DenseSet VisitedRegs; |
| 4479 |
SmallPtrSet Regs; |
4479 |
SmallPtrSet Regs; |
| 4480 |
SmallPtrSet LoserRegs; |
4480 |
SmallPtrSet LoserRegs; |
| 4481 |
#ifndef NDEBUG |
4481 |
#ifndef NDEBUG |
| 4482 |
bool ChangedFormulae = false; |
4482 |
bool ChangedFormulae = false; |
| 4483 |
#endif |
4483 |
#endif |
| 4484 |
|
4484 |
|
| 4485 |
// Collect the best formula for each unique set of shared registers. This |
4485 |
// Collect the best formula for each unique set of shared registers. This |
| 4486 |
// is reset for each use. |
4486 |
// is reset for each use. |
| 4487 |
using BestFormulaeTy = |
4487 |
using BestFormulaeTy = |
| 4488 |
DenseMap, size_t, UniquifierDenseMapInfo>; |
4488 |
DenseMap, size_t, UniquifierDenseMapInfo>; |
| 4489 |
|
4489 |
|
| 4490 |
BestFormulaeTy BestFormulae; |
4490 |
BestFormulaeTy BestFormulae; |
| 4491 |
|
4491 |
|
| 4492 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4492 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4493 |
LSRUse &LU = Uses[LUIdx]; |
4493 |
LSRUse &LU = Uses[LUIdx]; |
| 4494 |
LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); |
4494 |
LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); |
| 4495 |
dbgs() << '\n'); |
4495 |
dbgs() << '\n'); |
| 4496 |
|
4496 |
|
| 4497 |
bool Any = false; |
4497 |
bool Any = false; |
| 4498 |
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); |
4498 |
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); |
| 4499 |
FIdx != NumForms; ++FIdx) { |
4499 |
FIdx != NumForms; ++FIdx) { |
| 4500 |
Formula &F = LU.Formulae[FIdx]; |
4500 |
Formula &F = LU.Formulae[FIdx]; |
| 4501 |
|
4501 |
|
| 4502 |
// Some formulas are instant losers. For example, they may depend on |
4502 |
// Some formulas are instant losers. For example, they may depend on |
| 4503 |
// nonexistent AddRecs from other loops. These need to be filtered |
4503 |
// nonexistent AddRecs from other loops. These need to be filtered |
| 4504 |
// immediately, otherwise heuristics could choose them over others leading |
4504 |
// immediately, otherwise heuristics could choose them over others leading |
| 4505 |
// to an unsatisfactory solution. Passing LoserRegs into RateFormula here |
4505 |
// to an unsatisfactory solution. Passing LoserRegs into RateFormula here |
| 4506 |
// avoids the need to recompute this information across formulae using the |
4506 |
// avoids the need to recompute this information across formulae using the |
| 4507 |
// same bad AddRec. Passing LoserRegs is also essential unless we remove |
4507 |
// same bad AddRec. Passing LoserRegs is also essential unless we remove |
| 4508 |
// the corresponding bad register from the Regs set. |
4508 |
// the corresponding bad register from the Regs set. |
| 4509 |
Cost CostF(L, SE, TTI, AMK); |
4509 |
Cost CostF(L, SE, TTI, AMK); |
| 4510 |
Regs.clear(); |
4510 |
Regs.clear(); |
| 4511 |
CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs); |
4511 |
CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs); |
| 4512 |
if (CostF.isLoser()) { |
4512 |
if (CostF.isLoser()) { |
| 4513 |
// During initial formula generation, undesirable formulae are generated |
4513 |
// During initial formula generation, undesirable formulae are generated |
| 4514 |
// by uses within other loops that have some non-trivial address mode or |
4514 |
// by uses within other loops that have some non-trivial address mode or |
| 4515 |
// use the postinc form of the IV. LSR needs to provide these formulae |
4515 |
// use the postinc form of the IV. LSR needs to provide these formulae |
| 4516 |
// as the basis of rediscovering the desired formula that uses an AddRec |
4516 |
// as the basis of rediscovering the desired formula that uses an AddRec |
| 4517 |
// corresponding to the existing phi. Once all formulae have been |
4517 |
// corresponding to the existing phi. Once all formulae have been |
| 4518 |
// generated, these initial losers may be pruned. |
4518 |
// generated, these initial losers may be pruned. |
| 4519 |
LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); |
4519 |
LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); |
| 4520 |
dbgs() << "\n"); |
4520 |
dbgs() << "\n"); |
| 4521 |
} |
4521 |
} |
| 4522 |
else { |
4522 |
else { |
| 4523 |
SmallVector Key; |
4523 |
SmallVector Key; |
| 4524 |
for (const SCEV *Reg : F.BaseRegs) { |
4524 |
for (const SCEV *Reg : F.BaseRegs) { |
| 4525 |
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) |
4525 |
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) |
| 4526 |
Key.push_back(Reg); |
4526 |
Key.push_back(Reg); |
| 4527 |
} |
4527 |
} |
| 4528 |
if (F.ScaledReg && |
4528 |
if (F.ScaledReg && |
| 4529 |
RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) |
4529 |
RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) |
| 4530 |
Key.push_back(F.ScaledReg); |
4530 |
Key.push_back(F.ScaledReg); |
| 4531 |
// Unstable sort by host order ok, because this is only used for |
4531 |
// Unstable sort by host order ok, because this is only used for |
| 4532 |
// uniquifying. |
4532 |
// uniquifying. |
| 4533 |
llvm::sort(Key); |
4533 |
llvm::sort(Key); |
| 4534 |
|
4534 |
|
| 4535 |
std::pair P = |
4535 |
std::pair P = |
| 4536 |
BestFormulae.insert(std::make_pair(Key, FIdx)); |
4536 |
BestFormulae.insert(std::make_pair(Key, FIdx)); |
| 4537 |
if (P.second) |
4537 |
if (P.second) |
| 4538 |
continue; |
4538 |
continue; |
| 4539 |
|
4539 |
|
| 4540 |
Formula &Best = LU.Formulae[P.first->second]; |
4540 |
Formula &Best = LU.Formulae[P.first->second]; |
| 4541 |
|
4541 |
|
| 4542 |
Cost CostBest(L, SE, TTI, AMK); |
4542 |
Cost CostBest(L, SE, TTI, AMK); |
| 4543 |
Regs.clear(); |
4543 |
Regs.clear(); |
| 4544 |
CostBest.RateFormula(Best, Regs, VisitedRegs, LU); |
4544 |
CostBest.RateFormula(Best, Regs, VisitedRegs, LU); |
| 4545 |
if (CostF.isLess(CostBest)) |
4545 |
if (CostF.isLess(CostBest)) |
| 4546 |
std::swap(F, Best); |
4546 |
std::swap(F, Best); |
| 4547 |
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); |
4547 |
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); |
| 4548 |
dbgs() << "\n" |
4548 |
dbgs() << "\n" |
| 4549 |
" in favor of formula "; |
4549 |
" in favor of formula "; |
| 4550 |
Best.print(dbgs()); dbgs() << '\n'); |
4550 |
Best.print(dbgs()); dbgs() << '\n'); |
| 4551 |
} |
4551 |
} |
| 4552 |
#ifndef NDEBUG |
4552 |
#ifndef NDEBUG |
| 4553 |
ChangedFormulae = true; |
4553 |
ChangedFormulae = true; |
| 4554 |
#endif |
4554 |
#endif |
| 4555 |
LU.DeleteFormula(F); |
4555 |
LU.DeleteFormula(F); |
| 4556 |
--FIdx; |
4556 |
--FIdx; |
| 4557 |
--NumForms; |
4557 |
--NumForms; |
| 4558 |
Any = true; |
4558 |
Any = true; |
| 4559 |
} |
4559 |
} |
| 4560 |
|
4560 |
|
| 4561 |
// Now that we've filtered out some formulae, recompute the Regs set. |
4561 |
// Now that we've filtered out some formulae, recompute the Regs set. |
| 4562 |
if (Any) |
4562 |
if (Any) |
| 4563 |
LU.RecomputeRegs(LUIdx, RegUses); |
4563 |
LU.RecomputeRegs(LUIdx, RegUses); |
| 4564 |
|
4564 |
|
| 4565 |
// Reset this to prepare for the next use. |
4565 |
// Reset this to prepare for the next use. |
| 4566 |
BestFormulae.clear(); |
4566 |
BestFormulae.clear(); |
| 4567 |
} |
4567 |
} |
| 4568 |
|
4568 |
|
| 4569 |
LLVM_DEBUG(if (ChangedFormulae) { |
4569 |
LLVM_DEBUG(if (ChangedFormulae) { |
| 4570 |
dbgs() << "\n" |
4570 |
dbgs() << "\n" |
| 4571 |
"After filtering out undesirable candidates:\n"; |
4571 |
"After filtering out undesirable candidates:\n"; |
| 4572 |
print_uses(dbgs()); |
4572 |
print_uses(dbgs()); |
| 4573 |
}); |
4573 |
}); |
| 4574 |
} |
4574 |
} |
| 4575 |
|
4575 |
|
| 4576 |
/// Estimate the worst-case number of solutions the solver might have to |
4576 |
/// Estimate the worst-case number of solutions the solver might have to |
| 4577 |
/// consider. It almost never considers this many solutions because it prune the |
4577 |
/// consider. It almost never considers this many solutions because it prune the |
| 4578 |
/// search space, but the pruning isn't always sufficient. |
4578 |
/// search space, but the pruning isn't always sufficient. |
| 4579 |
size_t LSRInstance::EstimateSearchSpaceComplexity() const { |
4579 |
size_t LSRInstance::EstimateSearchSpaceComplexity() const { |
| 4580 |
size_t Power = 1; |
4580 |
size_t Power = 1; |
| 4581 |
for (const LSRUse &LU : Uses) { |
4581 |
for (const LSRUse &LU : Uses) { |
| 4582 |
size_t FSize = LU.Formulae.size(); |
4582 |
size_t FSize = LU.Formulae.size(); |
| 4583 |
if (FSize >= ComplexityLimit) { |
4583 |
if (FSize >= ComplexityLimit) { |
| 4584 |
Power = ComplexityLimit; |
4584 |
Power = ComplexityLimit; |
| 4585 |
break; |
4585 |
break; |
| 4586 |
} |
4586 |
} |
| 4587 |
Power *= FSize; |
4587 |
Power *= FSize; |
| 4588 |
if (Power >= ComplexityLimit) |
4588 |
if (Power >= ComplexityLimit) |
| 4589 |
break; |
4589 |
break; |
| 4590 |
} |
4590 |
} |
| 4591 |
return Power; |
4591 |
return Power; |
| 4592 |
} |
4592 |
} |
| 4593 |
|
4593 |
|
| 4594 |
/// When one formula uses a superset of the registers of another formula, it |
4594 |
/// When one formula uses a superset of the registers of another formula, it |
| 4595 |
/// won't help reduce register pressure (though it may not necessarily hurt |
4595 |
/// won't help reduce register pressure (though it may not necessarily hurt |
| 4596 |
/// register pressure); remove it to simplify the system. |
4596 |
/// register pressure); remove it to simplify the system. |
| 4597 |
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { |
4597 |
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { |
| 4598 |
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { |
4598 |
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { |
| 4599 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
4599 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
| 4600 |
|
4600 |
|
| 4601 |
LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " |
4601 |
LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " |
| 4602 |
"which use a superset of registers used by other " |
4602 |
"which use a superset of registers used by other " |
| 4603 |
"formulae.\n"); |
4603 |
"formulae.\n"); |
| 4604 |
|
4604 |
|
| 4605 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4605 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4606 |
LSRUse &LU = Uses[LUIdx]; |
4606 |
LSRUse &LU = Uses[LUIdx]; |
| 4607 |
bool Any = false; |
4607 |
bool Any = false; |
| 4608 |
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { |
4608 |
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { |
| 4609 |
Formula &F = LU.Formulae[i]; |
4609 |
Formula &F = LU.Formulae[i]; |
| 4610 |
// Look for a formula with a constant or GV in a register. If the use |
4610 |
// Look for a formula with a constant or GV in a register. If the use |
| 4611 |
// also has a formula with that same value in an immediate field, |
4611 |
// also has a formula with that same value in an immediate field, |
| 4612 |
// delete the one that uses a register. |
4612 |
// delete the one that uses a register. |
| 4613 |
for (SmallVectorImpl::const_iterator |
4613 |
for (SmallVectorImpl::const_iterator |
| 4614 |
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { |
4614 |
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { |
| 4615 |
if (const SCEVConstant *C = dyn_cast(*I)) { |
4615 |
if (const SCEVConstant *C = dyn_cast(*I)) { |
| 4616 |
Formula NewF = F; |
4616 |
Formula NewF = F; |
| 4617 |
//FIXME: Formulas should store bitwidth to do wrapping properly. |
4617 |
//FIXME: Formulas should store bitwidth to do wrapping properly. |
| 4618 |
// See PR41034. |
4618 |
// See PR41034. |
| 4619 |
NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue(); |
4619 |
NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue(); |
| 4620 |
NewF.BaseRegs.erase(NewF.BaseRegs.begin() + |
4620 |
NewF.BaseRegs.erase(NewF.BaseRegs.begin() + |
| 4621 |
(I - F.BaseRegs.begin())); |
4621 |
(I - F.BaseRegs.begin())); |
| 4622 |
if (LU.HasFormulaWithSameRegs(NewF)) { |
4622 |
if (LU.HasFormulaWithSameRegs(NewF)) { |
| 4623 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); |
4623 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); |
| 4624 |
dbgs() << '\n'); |
4624 |
dbgs() << '\n'); |
| 4625 |
LU.DeleteFormula(F); |
4625 |
LU.DeleteFormula(F); |
| 4626 |
--i; |
4626 |
--i; |
| 4627 |
--e; |
4627 |
--e; |
| 4628 |
Any = true; |
4628 |
Any = true; |
| 4629 |
break; |
4629 |
break; |
| 4630 |
} |
4630 |
} |
| 4631 |
} else if (const SCEVUnknown *U = dyn_cast(*I)) { |
4631 |
} else if (const SCEVUnknown *U = dyn_cast(*I)) { |
| 4632 |
if (GlobalValue *GV = dyn_cast(U->getValue())) |
4632 |
if (GlobalValue *GV = dyn_cast(U->getValue())) |
| 4633 |
if (!F.BaseGV) { |
4633 |
if (!F.BaseGV) { |
| 4634 |
Formula NewF = F; |
4634 |
Formula NewF = F; |
| 4635 |
NewF.BaseGV = GV; |
4635 |
NewF.BaseGV = GV; |
| 4636 |
NewF.BaseRegs.erase(NewF.BaseRegs.begin() + |
4636 |
NewF.BaseRegs.erase(NewF.BaseRegs.begin() + |
| 4637 |
(I - F.BaseRegs.begin())); |
4637 |
(I - F.BaseRegs.begin())); |
| 4638 |
if (LU.HasFormulaWithSameRegs(NewF)) { |
4638 |
if (LU.HasFormulaWithSameRegs(NewF)) { |
| 4639 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); |
4639 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); |
| 4640 |
dbgs() << '\n'); |
4640 |
dbgs() << '\n'); |
| 4641 |
LU.DeleteFormula(F); |
4641 |
LU.DeleteFormula(F); |
| 4642 |
--i; |
4642 |
--i; |
| 4643 |
--e; |
4643 |
--e; |
| 4644 |
Any = true; |
4644 |
Any = true; |
| 4645 |
break; |
4645 |
break; |
| 4646 |
} |
4646 |
} |
| 4647 |
} |
4647 |
} |
| 4648 |
} |
4648 |
} |
| 4649 |
} |
4649 |
} |
| 4650 |
} |
4650 |
} |
| 4651 |
if (Any) |
4651 |
if (Any) |
| 4652 |
LU.RecomputeRegs(LUIdx, RegUses); |
4652 |
LU.RecomputeRegs(LUIdx, RegUses); |
| 4653 |
} |
4653 |
} |
| 4654 |
|
4654 |
|
| 4655 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
4655 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
| 4656 |
} |
4656 |
} |
| 4657 |
} |
4657 |
} |
| 4658 |
|
4658 |
|
| 4659 |
/// When there are many registers for expressions like A, A+1, A+2, etc., |
4659 |
/// When there are many registers for expressions like A, A+1, A+2, etc., |
| 4660 |
/// allocate a single register for them. |
4660 |
/// allocate a single register for them. |
| 4661 |
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { |
4661 |
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { |
| 4662 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
4662 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
| 4663 |
return; |
4663 |
return; |
| 4664 |
|
4664 |
|
| 4665 |
LLVM_DEBUG( |
4665 |
LLVM_DEBUG( |
| 4666 |
dbgs() << "The search space is too complex.\n" |
4666 |
dbgs() << "The search space is too complex.\n" |
| 4667 |
"Narrowing the search space by assuming that uses separated " |
4667 |
"Narrowing the search space by assuming that uses separated " |
| 4668 |
"by a constant offset will use the same registers.\n"); |
4668 |
"by a constant offset will use the same registers.\n"); |
| 4669 |
|
4669 |
|
| 4670 |
// This is especially useful for unrolled loops. |
4670 |
// This is especially useful for unrolled loops. |
| 4671 |
|
4671 |
|
| 4672 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4672 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4673 |
LSRUse &LU = Uses[LUIdx]; |
4673 |
LSRUse &LU = Uses[LUIdx]; |
| 4674 |
for (const Formula &F : LU.Formulae) { |
4674 |
for (const Formula &F : LU.Formulae) { |
| 4675 |
if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1)) |
4675 |
if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1)) |
| 4676 |
continue; |
4676 |
continue; |
| 4677 |
|
4677 |
|
| 4678 |
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); |
4678 |
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); |
| 4679 |
if (!LUThatHas) |
4679 |
if (!LUThatHas) |
| 4680 |
continue; |
4680 |
continue; |
| 4681 |
|
4681 |
|
| 4682 |
if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, |
4682 |
if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, |
| 4683 |
LU.Kind, LU.AccessTy)) |
4683 |
LU.Kind, LU.AccessTy)) |
| 4684 |
continue; |
4684 |
continue; |
| 4685 |
|
4685 |
|
| 4686 |
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); |
4686 |
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); |
| 4687 |
|
4687 |
|
| 4688 |
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; |
4688 |
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; |
| 4689 |
|
4689 |
|
| 4690 |
// Transfer the fixups of LU to LUThatHas. |
4690 |
// Transfer the fixups of LU to LUThatHas. |
| 4691 |
for (LSRFixup &Fixup : LU.Fixups) { |
4691 |
for (LSRFixup &Fixup : LU.Fixups) { |
| 4692 |
Fixup.Offset += F.BaseOffset; |
4692 |
Fixup.Offset += F.BaseOffset; |
| 4693 |
LUThatHas->pushFixup(Fixup); |
4693 |
LUThatHas->pushFixup(Fixup); |
| 4694 |
LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); |
4694 |
LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); |
| 4695 |
} |
4695 |
} |
| 4696 |
|
4696 |
|
| 4697 |
// Delete formulae from the new use which are no longer legal. |
4697 |
// Delete formulae from the new use which are no longer legal. |
| 4698 |
bool Any = false; |
4698 |
bool Any = false; |
| 4699 |
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { |
4699 |
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { |
| 4700 |
Formula &F = LUThatHas->Formulae[i]; |
4700 |
Formula &F = LUThatHas->Formulae[i]; |
| 4701 |
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, |
4701 |
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, |
| 4702 |
LUThatHas->Kind, LUThatHas->AccessTy, F)) { |
4702 |
LUThatHas->Kind, LUThatHas->AccessTy, F)) { |
| 4703 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); |
4703 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); |
| 4704 |
LUThatHas->DeleteFormula(F); |
4704 |
LUThatHas->DeleteFormula(F); |
| 4705 |
--i; |
4705 |
--i; |
| 4706 |
--e; |
4706 |
--e; |
| 4707 |
Any = true; |
4707 |
Any = true; |
| 4708 |
} |
4708 |
} |
| 4709 |
} |
4709 |
} |
| 4710 |
|
4710 |
|
| 4711 |
if (Any) |
4711 |
if (Any) |
| 4712 |
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); |
4712 |
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); |
| 4713 |
|
4713 |
|
| 4714 |
// Delete the old use. |
4714 |
// Delete the old use. |
| 4715 |
DeleteUse(LU, LUIdx); |
4715 |
DeleteUse(LU, LUIdx); |
| 4716 |
--LUIdx; |
4716 |
--LUIdx; |
| 4717 |
--NumUses; |
4717 |
--NumUses; |
| 4718 |
break; |
4718 |
break; |
| 4719 |
} |
4719 |
} |
| 4720 |
} |
4720 |
} |
| 4721 |
|
4721 |
|
| 4722 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
4722 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
| 4723 |
} |
4723 |
} |
| 4724 |
|
4724 |
|
| 4725 |
/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that |
4725 |
/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that |
| 4726 |
/// we've done more filtering, as it may be able to find more formulae to |
4726 |
/// we've done more filtering, as it may be able to find more formulae to |
| 4727 |
/// eliminate. |
4727 |
/// eliminate. |
| 4728 |
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ |
4728 |
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ |
| 4729 |
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { |
4729 |
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { |
| 4730 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
4730 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
| 4731 |
|
4731 |
|
| 4732 |
LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out " |
4732 |
LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out " |
| 4733 |
"undesirable dedicated registers.\n"); |
4733 |
"undesirable dedicated registers.\n"); |
| 4734 |
|
4734 |
|
| 4735 |
FilterOutUndesirableDedicatedRegisters(); |
4735 |
FilterOutUndesirableDedicatedRegisters(); |
| 4736 |
|
4736 |
|
| 4737 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
4737 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
| 4738 |
} |
4738 |
} |
| 4739 |
} |
4739 |
} |
| 4740 |
|
4740 |
|
| 4741 |
/// If a LSRUse has multiple formulae with the same ScaledReg and Scale. |
4741 |
/// If a LSRUse has multiple formulae with the same ScaledReg and Scale. |
| 4742 |
/// Pick the best one and delete the others. |
4742 |
/// Pick the best one and delete the others. |
| 4743 |
/// This narrowing heuristic is to keep as many formulae with different |
4743 |
/// This narrowing heuristic is to keep as many formulae with different |
| 4744 |
/// Scale and ScaledReg pair as possible while narrowing the search space. |
4744 |
/// Scale and ScaledReg pair as possible while narrowing the search space. |
| 4745 |
/// The benefit is that it is more likely to find out a better solution |
4745 |
/// The benefit is that it is more likely to find out a better solution |
| 4746 |
/// from a formulae set with more Scale and ScaledReg variations than |
4746 |
/// from a formulae set with more Scale and ScaledReg variations than |
| 4747 |
/// a formulae set with the same Scale and ScaledReg. The picking winner |
4747 |
/// a formulae set with the same Scale and ScaledReg. The picking winner |
| 4748 |
/// reg heuristic will often keep the formulae with the same Scale and |
4748 |
/// reg heuristic will often keep the formulae with the same Scale and |
| 4749 |
/// ScaledReg and filter others, and we want to avoid that if possible. |
4749 |
/// ScaledReg and filter others, and we want to avoid that if possible. |
| 4750 |
void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { |
4750 |
void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { |
| 4751 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
4751 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
| 4752 |
return; |
4752 |
return; |
| 4753 |
|
4753 |
|
| 4754 |
LLVM_DEBUG( |
4754 |
LLVM_DEBUG( |
| 4755 |
dbgs() << "The search space is too complex.\n" |
4755 |
dbgs() << "The search space is too complex.\n" |
| 4756 |
"Narrowing the search space by choosing the best Formula " |
4756 |
"Narrowing the search space by choosing the best Formula " |
| 4757 |
"from the Formulae with the same Scale and ScaledReg.\n"); |
4757 |
"from the Formulae with the same Scale and ScaledReg.\n"); |
| 4758 |
|
4758 |
|
| 4759 |
// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. |
4759 |
// Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. |
| 4760 |
using BestFormulaeTy = DenseMap, size_t>; |
4760 |
using BestFormulaeTy = DenseMap, size_t>; |
| 4761 |
|
4761 |
|
| 4762 |
BestFormulaeTy BestFormulae; |
4762 |
BestFormulaeTy BestFormulae; |
| 4763 |
#ifndef NDEBUG |
4763 |
#ifndef NDEBUG |
| 4764 |
bool ChangedFormulae = false; |
4764 |
bool ChangedFormulae = false; |
| 4765 |
#endif |
4765 |
#endif |
| 4766 |
DenseSet VisitedRegs; |
4766 |
DenseSet VisitedRegs; |
| 4767 |
SmallPtrSet Regs; |
4767 |
SmallPtrSet Regs; |
| 4768 |
|
4768 |
|
| 4769 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4769 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4770 |
LSRUse &LU = Uses[LUIdx]; |
4770 |
LSRUse &LU = Uses[LUIdx]; |
| 4771 |
LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); |
4771 |
LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); |
| 4772 |
dbgs() << '\n'); |
4772 |
dbgs() << '\n'); |
| 4773 |
|
4773 |
|
| 4774 |
// Return true if Formula FA is better than Formula FB. |
4774 |
// Return true if Formula FA is better than Formula FB. |
| 4775 |
auto IsBetterThan = [&](Formula &FA, Formula &FB) { |
4775 |
auto IsBetterThan = [&](Formula &FA, Formula &FB) { |
| 4776 |
// First we will try to choose the Formula with fewer new registers. |
4776 |
// First we will try to choose the Formula with fewer new registers. |
| 4777 |
// For a register used by current Formula, the more the register is |
4777 |
// For a register used by current Formula, the more the register is |
| 4778 |
// shared among LSRUses, the less we increase the register number |
4778 |
// shared among LSRUses, the less we increase the register number |
| 4779 |
// counter of the formula. |
4779 |
// counter of the formula. |
| 4780 |
size_t FARegNum = 0; |
4780 |
size_t FARegNum = 0; |
| 4781 |
for (const SCEV *Reg : FA.BaseRegs) { |
4781 |
for (const SCEV *Reg : FA.BaseRegs) { |
| 4782 |
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); |
4782 |
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); |
| 4783 |
FARegNum += (NumUses - UsedByIndices.count() + 1); |
4783 |
FARegNum += (NumUses - UsedByIndices.count() + 1); |
| 4784 |
} |
4784 |
} |
| 4785 |
size_t FBRegNum = 0; |
4785 |
size_t FBRegNum = 0; |
| 4786 |
for (const SCEV *Reg : FB.BaseRegs) { |
4786 |
for (const SCEV *Reg : FB.BaseRegs) { |
| 4787 |
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); |
4787 |
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); |
| 4788 |
FBRegNum += (NumUses - UsedByIndices.count() + 1); |
4788 |
FBRegNum += (NumUses - UsedByIndices.count() + 1); |
| 4789 |
} |
4789 |
} |
| 4790 |
if (FARegNum != FBRegNum) |
4790 |
if (FARegNum != FBRegNum) |
| 4791 |
return FARegNum < FBRegNum; |
4791 |
return FARegNum < FBRegNum; |
| 4792 |
|
4792 |
|
| 4793 |
// If the new register numbers are the same, choose the Formula with |
4793 |
// If the new register numbers are the same, choose the Formula with |
| 4794 |
// less Cost. |
4794 |
// less Cost. |
| 4795 |
Cost CostFA(L, SE, TTI, AMK); |
4795 |
Cost CostFA(L, SE, TTI, AMK); |
| 4796 |
Cost CostFB(L, SE, TTI, AMK); |
4796 |
Cost CostFB(L, SE, TTI, AMK); |
| 4797 |
Regs.clear(); |
4797 |
Regs.clear(); |
| 4798 |
CostFA.RateFormula(FA, Regs, VisitedRegs, LU); |
4798 |
CostFA.RateFormula(FA, Regs, VisitedRegs, LU); |
| 4799 |
Regs.clear(); |
4799 |
Regs.clear(); |
| 4800 |
CostFB.RateFormula(FB, Regs, VisitedRegs, LU); |
4800 |
CostFB.RateFormula(FB, Regs, VisitedRegs, LU); |
| 4801 |
return CostFA.isLess(CostFB); |
4801 |
return CostFA.isLess(CostFB); |
| 4802 |
}; |
4802 |
}; |
| 4803 |
|
4803 |
|
| 4804 |
bool Any = false; |
4804 |
bool Any = false; |
| 4805 |
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; |
4805 |
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; |
| 4806 |
++FIdx) { |
4806 |
++FIdx) { |
| 4807 |
Formula &F = LU.Formulae[FIdx]; |
4807 |
Formula &F = LU.Formulae[FIdx]; |
| 4808 |
if (!F.ScaledReg) |
4808 |
if (!F.ScaledReg) |
| 4809 |
continue; |
4809 |
continue; |
| 4810 |
auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx}); |
4810 |
auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx}); |
| 4811 |
if (P.second) |
4811 |
if (P.second) |
| 4812 |
continue; |
4812 |
continue; |
| 4813 |
|
4813 |
|
| 4814 |
Formula &Best = LU.Formulae[P.first->second]; |
4814 |
Formula &Best = LU.Formulae[P.first->second]; |
| 4815 |
if (IsBetterThan(F, Best)) |
4815 |
if (IsBetterThan(F, Best)) |
| 4816 |
std::swap(F, Best); |
4816 |
std::swap(F, Best); |
| 4817 |
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); |
4817 |
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); |
| 4818 |
dbgs() << "\n" |
4818 |
dbgs() << "\n" |
| 4819 |
" in favor of formula "; |
4819 |
" in favor of formula "; |
| 4820 |
Best.print(dbgs()); dbgs() << '\n'); |
4820 |
Best.print(dbgs()); dbgs() << '\n'); |
| 4821 |
#ifndef NDEBUG |
4821 |
#ifndef NDEBUG |
| 4822 |
ChangedFormulae = true; |
4822 |
ChangedFormulae = true; |
| 4823 |
#endif |
4823 |
#endif |
| 4824 |
LU.DeleteFormula(F); |
4824 |
LU.DeleteFormula(F); |
| 4825 |
--FIdx; |
4825 |
--FIdx; |
| 4826 |
--NumForms; |
4826 |
--NumForms; |
| 4827 |
Any = true; |
4827 |
Any = true; |
| 4828 |
} |
4828 |
} |
| 4829 |
if (Any) |
4829 |
if (Any) |
| 4830 |
LU.RecomputeRegs(LUIdx, RegUses); |
4830 |
LU.RecomputeRegs(LUIdx, RegUses); |
| 4831 |
|
4831 |
|
| 4832 |
// Reset this to prepare for the next use. |
4832 |
// Reset this to prepare for the next use. |
| 4833 |
BestFormulae.clear(); |
4833 |
BestFormulae.clear(); |
| 4834 |
} |
4834 |
} |
| 4835 |
|
4835 |
|
| 4836 |
LLVM_DEBUG(if (ChangedFormulae) { |
4836 |
LLVM_DEBUG(if (ChangedFormulae) { |
| 4837 |
dbgs() << "\n" |
4837 |
dbgs() << "\n" |
| 4838 |
"After filtering out undesirable candidates:\n"; |
4838 |
"After filtering out undesirable candidates:\n"; |
| 4839 |
print_uses(dbgs()); |
4839 |
print_uses(dbgs()); |
| 4840 |
}); |
4840 |
}); |
| 4841 |
} |
4841 |
} |
| 4842 |
|
4842 |
|
| 4843 |
/// If we are over the complexity limit, filter out any post-inc prefering |
4843 |
/// If we are over the complexity limit, filter out any post-inc prefering |
| 4844 |
/// variables to only post-inc values. |
4844 |
/// variables to only post-inc values. |
| 4845 |
void LSRInstance::NarrowSearchSpaceByFilterPostInc() { |
4845 |
void LSRInstance::NarrowSearchSpaceByFilterPostInc() { |
| 4846 |
if (AMK != TTI::AMK_PostIndexed) |
4846 |
if (AMK != TTI::AMK_PostIndexed) |
| 4847 |
return; |
4847 |
return; |
| 4848 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
4848 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
| 4849 |
return; |
4849 |
return; |
| 4850 |
|
4850 |
|
| 4851 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n" |
4851 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n" |
| 4852 |
"Narrowing the search space by choosing the lowest " |
4852 |
"Narrowing the search space by choosing the lowest " |
| 4853 |
"register Formula for PostInc Uses.\n"); |
4853 |
"register Formula for PostInc Uses.\n"); |
| 4854 |
|
4854 |
|
| 4855 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4855 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4856 |
LSRUse &LU = Uses[LUIdx]; |
4856 |
LSRUse &LU = Uses[LUIdx]; |
| 4857 |
|
4857 |
|
| 4858 |
if (LU.Kind != LSRUse::Address) |
4858 |
if (LU.Kind != LSRUse::Address) |
| 4859 |
continue; |
4859 |
continue; |
| 4860 |
if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) && |
4860 |
if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) && |
| 4861 |
!TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType())) |
4861 |
!TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType())) |
| 4862 |
continue; |
4862 |
continue; |
| 4863 |
|
4863 |
|
| 4864 |
size_t MinRegs = std::numeric_limits::max(); |
4864 |
size_t MinRegs = std::numeric_limits::max(); |
| 4865 |
for (const Formula &F : LU.Formulae) |
4865 |
for (const Formula &F : LU.Formulae) |
| 4866 |
MinRegs = std::min(F.getNumRegs(), MinRegs); |
4866 |
MinRegs = std::min(F.getNumRegs(), MinRegs); |
| 4867 |
|
4867 |
|
| 4868 |
bool Any = false; |
4868 |
bool Any = false; |
| 4869 |
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; |
4869 |
for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; |
| 4870 |
++FIdx) { |
4870 |
++FIdx) { |
| 4871 |
Formula &F = LU.Formulae[FIdx]; |
4871 |
Formula &F = LU.Formulae[FIdx]; |
| 4872 |
if (F.getNumRegs() > MinRegs) { |
4872 |
if (F.getNumRegs() > MinRegs) { |
| 4873 |
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); |
4873 |
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); |
| 4874 |
dbgs() << "\n"); |
4874 |
dbgs() << "\n"); |
| 4875 |
LU.DeleteFormula(F); |
4875 |
LU.DeleteFormula(F); |
| 4876 |
--FIdx; |
4876 |
--FIdx; |
| 4877 |
--NumForms; |
4877 |
--NumForms; |
| 4878 |
Any = true; |
4878 |
Any = true; |
| 4879 |
} |
4879 |
} |
| 4880 |
} |
4880 |
} |
| 4881 |
if (Any) |
4881 |
if (Any) |
| 4882 |
LU.RecomputeRegs(LUIdx, RegUses); |
4882 |
LU.RecomputeRegs(LUIdx, RegUses); |
| 4883 |
|
4883 |
|
| 4884 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
4884 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
| 4885 |
break; |
4885 |
break; |
| 4886 |
} |
4886 |
} |
| 4887 |
|
4887 |
|
| 4888 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
4888 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
| 4889 |
} |
4889 |
} |
| 4890 |
|
4890 |
|
| 4891 |
/// The function delete formulas with high registers number expectation. |
4891 |
/// The function delete formulas with high registers number expectation. |
| 4892 |
/// Assuming we don't know the value of each formula (already delete |
4892 |
/// Assuming we don't know the value of each formula (already delete |
| 4893 |
/// all inefficient), generate probability of not selecting for each |
4893 |
/// all inefficient), generate probability of not selecting for each |
| 4894 |
/// register. |
4894 |
/// register. |
| 4895 |
/// For example, |
4895 |
/// For example, |
| 4896 |
/// Use1: |
4896 |
/// Use1: |
| 4897 |
/// reg(a) + reg({0,+,1}) |
4897 |
/// reg(a) + reg({0,+,1}) |
| 4898 |
/// reg(a) + reg({-1,+,1}) + 1 |
4898 |
/// reg(a) + reg({-1,+,1}) + 1 |
| 4899 |
/// reg({a,+,1}) |
4899 |
/// reg({a,+,1}) |
| 4900 |
/// Use2: |
4900 |
/// Use2: |
| 4901 |
/// reg(b) + reg({0,+,1}) |
4901 |
/// reg(b) + reg({0,+,1}) |
| 4902 |
/// reg(b) + reg({-1,+,1}) + 1 |
4902 |
/// reg(b) + reg({-1,+,1}) + 1 |
| 4903 |
/// reg({b,+,1}) |
4903 |
/// reg({b,+,1}) |
| 4904 |
/// Use3: |
4904 |
/// Use3: |
| 4905 |
/// reg(c) + reg(b) + reg({0,+,1}) |
4905 |
/// reg(c) + reg(b) + reg({0,+,1}) |
| 4906 |
/// reg(c) + reg({b,+,1}) |
4906 |
/// reg(c) + reg({b,+,1}) |
| 4907 |
/// |
4907 |
/// |
| 4908 |
/// Probability of not selecting |
4908 |
/// Probability of not selecting |
| 4909 |
/// Use1 Use2 Use3 |
4909 |
/// Use1 Use2 Use3 |
| 4910 |
/// reg(a) (1/3) * 1 * 1 |
4910 |
/// reg(a) (1/3) * 1 * 1 |
| 4911 |
/// reg(b) 1 * (1/3) * (1/2) |
4911 |
/// reg(b) 1 * (1/3) * (1/2) |
| 4912 |
/// reg({0,+,1}) (2/3) * (2/3) * (1/2) |
4912 |
/// reg({0,+,1}) (2/3) * (2/3) * (1/2) |
| 4913 |
/// reg({-1,+,1}) (2/3) * (2/3) * 1 |
4913 |
/// reg({-1,+,1}) (2/3) * (2/3) * 1 |
| 4914 |
/// reg({a,+,1}) (2/3) * 1 * 1 |
4914 |
/// reg({a,+,1}) (2/3) * 1 * 1 |
| 4915 |
/// reg({b,+,1}) 1 * (2/3) * (2/3) |
4915 |
/// reg({b,+,1}) 1 * (2/3) * (2/3) |
| 4916 |
/// reg(c) 1 * 1 * 0 |
4916 |
/// reg(c) 1 * 1 * 0 |
| 4917 |
/// |
4917 |
/// |
| 4918 |
/// Now count registers number mathematical expectation for each formula: |
4918 |
/// Now count registers number mathematical expectation for each formula: |
| 4919 |
/// Note that for each use we exclude probability if not selecting for the use. |
4919 |
/// Note that for each use we exclude probability if not selecting for the use. |
| 4920 |
/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding |
4920 |
/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding |
| 4921 |
/// probabilty 1/3 of not selecting for Use1). |
4921 |
/// probabilty 1/3 of not selecting for Use1). |
| 4922 |
/// Use1: |
4922 |
/// Use1: |
| 4923 |
/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted |
4923 |
/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted |
| 4924 |
/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted |
4924 |
/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted |
| 4925 |
/// reg({a,+,1}) 1 |
4925 |
/// reg({a,+,1}) 1 |
| 4926 |
/// Use2: |
4926 |
/// Use2: |
| 4927 |
/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted |
4927 |
/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted |
| 4928 |
/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted |
4928 |
/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted |
| 4929 |
/// reg({b,+,1}) 2/3 |
4929 |
/// reg({b,+,1}) 2/3 |
| 4930 |
/// Use3: |
4930 |
/// Use3: |
| 4931 |
/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted |
4931 |
/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted |
| 4932 |
/// reg(c) + reg({b,+,1}) 1 + 2/3 |
4932 |
/// reg(c) + reg({b,+,1}) 1 + 2/3 |
| 4933 |
void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { |
4933 |
void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { |
| 4934 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
4934 |
if (EstimateSearchSpaceComplexity() < ComplexityLimit) |
| 4935 |
return; |
4935 |
return; |
| 4936 |
// Ok, we have too many of formulae on our hands to conveniently handle. |
4936 |
// Ok, we have too many of formulae on our hands to conveniently handle. |
| 4937 |
// Use a rough heuristic to thin out the list. |
4937 |
// Use a rough heuristic to thin out the list. |
| 4938 |
|
4938 |
|
| 4939 |
// Set of Regs wich will be 100% used in final solution. |
4939 |
// Set of Regs wich will be 100% used in final solution. |
| 4940 |
// Used in each formula of a solution (in example above this is reg(c)). |
4940 |
// Used in each formula of a solution (in example above this is reg(c)). |
| 4941 |
// We can skip them in calculations. |
4941 |
// We can skip them in calculations. |
| 4942 |
SmallPtrSet UniqRegs; |
4942 |
SmallPtrSet UniqRegs; |
| 4943 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
4943 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
| 4944 |
|
4944 |
|
| 4945 |
// Map each register to probability of not selecting |
4945 |
// Map each register to probability of not selecting |
| 4946 |
DenseMap RegNumMap; |
4946 |
DenseMap RegNumMap; |
| 4947 |
for (const SCEV *Reg : RegUses) { |
4947 |
for (const SCEV *Reg : RegUses) { |
| 4948 |
if (UniqRegs.count(Reg)) |
4948 |
if (UniqRegs.count(Reg)) |
| 4949 |
continue; |
4949 |
continue; |
| 4950 |
float PNotSel = 1; |
4950 |
float PNotSel = 1; |
| 4951 |
for (const LSRUse &LU : Uses) { |
4951 |
for (const LSRUse &LU : Uses) { |
| 4952 |
if (!LU.Regs.count(Reg)) |
4952 |
if (!LU.Regs.count(Reg)) |
| 4953 |
continue; |
4953 |
continue; |
| 4954 |
float P = LU.getNotSelectedProbability(Reg); |
4954 |
float P = LU.getNotSelectedProbability(Reg); |
| 4955 |
if (P != 0.0) |
4955 |
if (P != 0.0) |
| 4956 |
PNotSel *= P; |
4956 |
PNotSel *= P; |
| 4957 |
else |
4957 |
else |
| 4958 |
UniqRegs.insert(Reg); |
4958 |
UniqRegs.insert(Reg); |
| 4959 |
} |
4959 |
} |
| 4960 |
RegNumMap.insert(std::make_pair(Reg, PNotSel)); |
4960 |
RegNumMap.insert(std::make_pair(Reg, PNotSel)); |
| 4961 |
} |
4961 |
} |
| 4962 |
|
4962 |
|
| 4963 |
LLVM_DEBUG( |
4963 |
LLVM_DEBUG( |
| 4964 |
dbgs() << "Narrowing the search space by deleting costly formulas\n"); |
4964 |
dbgs() << "Narrowing the search space by deleting costly formulas\n"); |
| 4965 |
|
4965 |
|
| 4966 |
// Delete formulas where registers number expectation is high. |
4966 |
// Delete formulas where registers number expectation is high. |
| 4967 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
4967 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 4968 |
LSRUse &LU = Uses[LUIdx]; |
4968 |
LSRUse &LU = Uses[LUIdx]; |
| 4969 |
// If nothing to delete - continue. |
4969 |
// If nothing to delete - continue. |
| 4970 |
if (LU.Formulae.size() < 2) |
4970 |
if (LU.Formulae.size() < 2) |
| 4971 |
continue; |
4971 |
continue; |
| 4972 |
// This is temporary solution to test performance. Float should be |
4972 |
// This is temporary solution to test performance. Float should be |
| 4973 |
// replaced with round independent type (based on integers) to avoid |
4973 |
// replaced with round independent type (based on integers) to avoid |
| 4974 |
// different results for different target builds. |
4974 |
// different results for different target builds. |
| 4975 |
float FMinRegNum = LU.Formulae[0].getNumRegs(); |
4975 |
float FMinRegNum = LU.Formulae[0].getNumRegs(); |
| 4976 |
float FMinARegNum = LU.Formulae[0].getNumRegs(); |
4976 |
float FMinARegNum = LU.Formulae[0].getNumRegs(); |
| 4977 |
size_t MinIdx = 0; |
4977 |
size_t MinIdx = 0; |
| 4978 |
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { |
4978 |
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { |
| 4979 |
Formula &F = LU.Formulae[i]; |
4979 |
Formula &F = LU.Formulae[i]; |
| 4980 |
float FRegNum = 0; |
4980 |
float FRegNum = 0; |
| 4981 |
float FARegNum = 0; |
4981 |
float FARegNum = 0; |
| 4982 |
for (const SCEV *BaseReg : F.BaseRegs) { |
4982 |
for (const SCEV *BaseReg : F.BaseRegs) { |
| 4983 |
if (UniqRegs.count(BaseReg)) |
4983 |
if (UniqRegs.count(BaseReg)) |
| 4984 |
continue; |
4984 |
continue; |
| 4985 |
FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); |
4985 |
FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); |
| 4986 |
if (isa(BaseReg)) |
4986 |
if (isa(BaseReg)) |
| 4987 |
FARegNum += |
4987 |
FARegNum += |
| 4988 |
RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); |
4988 |
RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); |
| 4989 |
} |
4989 |
} |
| 4990 |
if (const SCEV *ScaledReg = F.ScaledReg) { |
4990 |
if (const SCEV *ScaledReg = F.ScaledReg) { |
| 4991 |
if (!UniqRegs.count(ScaledReg)) { |
4991 |
if (!UniqRegs.count(ScaledReg)) { |
| 4992 |
FRegNum += |
4992 |
FRegNum += |
| 4993 |
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); |
4993 |
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); |
| 4994 |
if (isa(ScaledReg)) |
4994 |
if (isa(ScaledReg)) |
| 4995 |
FARegNum += |
4995 |
FARegNum += |
| 4996 |
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); |
4996 |
RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); |
| 4997 |
} |
4997 |
} |
| 4998 |
} |
4998 |
} |
| 4999 |
if (FMinRegNum > FRegNum || |
4999 |
if (FMinRegNum > FRegNum || |
| 5000 |
(FMinRegNum == FRegNum && FMinARegNum > FARegNum)) { |
5000 |
(FMinRegNum == FRegNum && FMinARegNum > FARegNum)) { |
| 5001 |
FMinRegNum = FRegNum; |
5001 |
FMinRegNum = FRegNum; |
| 5002 |
FMinARegNum = FARegNum; |
5002 |
FMinARegNum = FARegNum; |
| 5003 |
MinIdx = i; |
5003 |
MinIdx = i; |
| 5004 |
} |
5004 |
} |
| 5005 |
} |
5005 |
} |
| 5006 |
LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs()); |
5006 |
LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs()); |
| 5007 |
dbgs() << " with min reg num " << FMinRegNum << '\n'); |
5007 |
dbgs() << " with min reg num " << FMinRegNum << '\n'); |
| 5008 |
if (MinIdx != 0) |
5008 |
if (MinIdx != 0) |
| 5009 |
std::swap(LU.Formulae[MinIdx], LU.Formulae[0]); |
5009 |
std::swap(LU.Formulae[MinIdx], LU.Formulae[0]); |
| 5010 |
while (LU.Formulae.size() != 1) { |
5010 |
while (LU.Formulae.size() != 1) { |
| 5011 |
LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs()); |
5011 |
LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs()); |
| 5012 |
dbgs() << '\n'); |
5012 |
dbgs() << '\n'); |
| 5013 |
LU.Formulae.pop_back(); |
5013 |
LU.Formulae.pop_back(); |
| 5014 |
} |
5014 |
} |
| 5015 |
LU.RecomputeRegs(LUIdx, RegUses); |
5015 |
LU.RecomputeRegs(LUIdx, RegUses); |
| 5016 |
assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula"); |
5016 |
assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula"); |
| 5017 |
Formula &F = LU.Formulae[0]; |
5017 |
Formula &F = LU.Formulae[0]; |
| 5018 |
LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n'); |
5018 |
LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n'); |
| 5019 |
// When we choose the formula, the regs become unique. |
5019 |
// When we choose the formula, the regs become unique. |
| 5020 |
UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); |
5020 |
UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); |
| 5021 |
if (F.ScaledReg) |
5021 |
if (F.ScaledReg) |
| 5022 |
UniqRegs.insert(F.ScaledReg); |
5022 |
UniqRegs.insert(F.ScaledReg); |
| 5023 |
} |
5023 |
} |
| 5024 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
5024 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
| 5025 |
} |
5025 |
} |
| 5026 |
|
5026 |
|
| 5027 |
// Check if Best and Reg are SCEVs separated by a constant amount C, and if so |
5027 |
// Check if Best and Reg are SCEVs separated by a constant amount C, and if so |
| 5028 |
// would the addressing offset +C would be legal where the negative offset -C is |
5028 |
// would the addressing offset +C would be legal where the negative offset -C is |
| 5029 |
// not. |
5029 |
// not. |
| 5030 |
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, |
5030 |
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, |
| 5031 |
ScalarEvolution &SE, const SCEV *Best, |
5031 |
ScalarEvolution &SE, const SCEV *Best, |
| 5032 |
const SCEV *Reg, |
5032 |
const SCEV *Reg, |
| 5033 |
MemAccessTy AccessType) { |
5033 |
MemAccessTy AccessType) { |
| 5034 |
if (Best->getType() != Reg->getType() || |
5034 |
if (Best->getType() != Reg->getType() || |
| 5035 |
(isa(Best) && isa(Reg) && |
5035 |
(isa(Best) && isa(Reg) && |
| 5036 |
cast(Best)->getLoop() != |
5036 |
cast(Best)->getLoop() != |
| 5037 |
cast(Reg)->getLoop())) |
5037 |
cast(Reg)->getLoop())) |
| 5038 |
return false; |
5038 |
return false; |
| 5039 |
const auto *Diff = dyn_cast(SE.getMinusSCEV(Best, Reg)); |
5039 |
const auto *Diff = dyn_cast(SE.getMinusSCEV(Best, Reg)); |
| 5040 |
if (!Diff) |
5040 |
if (!Diff) |
| 5041 |
return false; |
5041 |
return false; |
| 5042 |
|
5042 |
|
| 5043 |
return TTI.isLegalAddressingMode( |
5043 |
return TTI.isLegalAddressingMode( |
| 5044 |
AccessType.MemTy, /*BaseGV=*/nullptr, |
5044 |
AccessType.MemTy, /*BaseGV=*/nullptr, |
| 5045 |
/*BaseOffset=*/Diff->getAPInt().getSExtValue(), |
5045 |
/*BaseOffset=*/Diff->getAPInt().getSExtValue(), |
| 5046 |
/*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) && |
5046 |
/*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) && |
| 5047 |
!TTI.isLegalAddressingMode( |
5047 |
!TTI.isLegalAddressingMode( |
| 5048 |
AccessType.MemTy, /*BaseGV=*/nullptr, |
5048 |
AccessType.MemTy, /*BaseGV=*/nullptr, |
| 5049 |
/*BaseOffset=*/-Diff->getAPInt().getSExtValue(), |
5049 |
/*BaseOffset=*/-Diff->getAPInt().getSExtValue(), |
| 5050 |
/*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace); |
5050 |
/*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace); |
| 5051 |
} |
5051 |
} |
| 5052 |
|
5052 |
|
| 5053 |
/// Pick a register which seems likely to be profitable, and then in any use |
5053 |
/// Pick a register which seems likely to be profitable, and then in any use |
| 5054 |
/// which has any reference to that register, delete all formulae which do not |
5054 |
/// which has any reference to that register, delete all formulae which do not |
| 5055 |
/// reference that register. |
5055 |
/// reference that register. |
| 5056 |
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { |
5056 |
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { |
| 5057 |
// With all other options exhausted, loop until the system is simple |
5057 |
// With all other options exhausted, loop until the system is simple |
| 5058 |
// enough to handle. |
5058 |
// enough to handle. |
| 5059 |
SmallPtrSet Taken; |
5059 |
SmallPtrSet Taken; |
| 5060 |
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) { |
5060 |
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) { |
| 5061 |
// Ok, we have too many of formulae on our hands to conveniently handle. |
5061 |
// Ok, we have too many of formulae on our hands to conveniently handle. |
| 5062 |
// Use a rough heuristic to thin out the list. |
5062 |
// Use a rough heuristic to thin out the list. |
| 5063 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
5063 |
LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); |
| 5064 |
|
5064 |
|
| 5065 |
// Pick the register which is used by the most LSRUses, which is likely |
5065 |
// Pick the register which is used by the most LSRUses, which is likely |
| 5066 |
// to be a good reuse register candidate. |
5066 |
// to be a good reuse register candidate. |
| 5067 |
const SCEV *Best = nullptr; |
5067 |
const SCEV *Best = nullptr; |
| 5068 |
unsigned BestNum = 0; |
5068 |
unsigned BestNum = 0; |
| 5069 |
for (const SCEV *Reg : RegUses) { |
5069 |
for (const SCEV *Reg : RegUses) { |
| 5070 |
if (Taken.count(Reg)) |
5070 |
if (Taken.count(Reg)) |
| 5071 |
continue; |
5071 |
continue; |
| 5072 |
if (!Best) { |
5072 |
if (!Best) { |
| 5073 |
Best = Reg; |
5073 |
Best = Reg; |
| 5074 |
BestNum = RegUses.getUsedByIndices(Reg).count(); |
5074 |
BestNum = RegUses.getUsedByIndices(Reg).count(); |
| 5075 |
} else { |
5075 |
} else { |
| 5076 |
unsigned Count = RegUses.getUsedByIndices(Reg).count(); |
5076 |
unsigned Count = RegUses.getUsedByIndices(Reg).count(); |
| 5077 |
if (Count > BestNum) { |
5077 |
if (Count > BestNum) { |
| 5078 |
Best = Reg; |
5078 |
Best = Reg; |
| 5079 |
BestNum = Count; |
5079 |
BestNum = Count; |
| 5080 |
} |
5080 |
} |
| 5081 |
|
5081 |
|
| 5082 |
// If the scores are the same, but the Reg is simpler for the target |
5082 |
// If the scores are the same, but the Reg is simpler for the target |
| 5083 |
// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can |
5083 |
// (for example {x,+,1} as opposed to {x+C,+,1}, where the target can |
| 5084 |
// handle +C but not -C), opt for the simpler formula. |
5084 |
// handle +C but not -C), opt for the simpler formula. |
| 5085 |
if (Count == BestNum) { |
5085 |
if (Count == BestNum) { |
| 5086 |
int LUIdx = RegUses.getUsedByIndices(Reg).find_first(); |
5086 |
int LUIdx = RegUses.getUsedByIndices(Reg).find_first(); |
| 5087 |
if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address && |
5087 |
if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address && |
| 5088 |
IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg, |
5088 |
IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg, |
| 5089 |
Uses[LUIdx].AccessTy)) { |
5089 |
Uses[LUIdx].AccessTy)) { |
| 5090 |
Best = Reg; |
5090 |
Best = Reg; |
| 5091 |
BestNum = Count; |
5091 |
BestNum = Count; |
| 5092 |
} |
5092 |
} |
| 5093 |
} |
5093 |
} |
| 5094 |
} |
5094 |
} |
| 5095 |
} |
5095 |
} |
| 5096 |
assert(Best && "Failed to find best LSRUse candidate"); |
5096 |
assert(Best && "Failed to find best LSRUse candidate"); |
| 5097 |
|
5097 |
|
| 5098 |
LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best |
5098 |
LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best |
| 5099 |
<< " will yield profitable reuse.\n"); |
5099 |
<< " will yield profitable reuse.\n"); |
| 5100 |
Taken.insert(Best); |
5100 |
Taken.insert(Best); |
| 5101 |
|
5101 |
|
| 5102 |
// In any use with formulae which references this register, delete formulae |
5102 |
// In any use with formulae which references this register, delete formulae |
| 5103 |
// which don't reference it. |
5103 |
// which don't reference it. |
| 5104 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
5104 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { |
| 5105 |
LSRUse &LU = Uses[LUIdx]; |
5105 |
LSRUse &LU = Uses[LUIdx]; |
| 5106 |
if (!LU.Regs.count(Best)) continue; |
5106 |
if (!LU.Regs.count(Best)) continue; |
| 5107 |
|
5107 |
|
| 5108 |
bool Any = false; |
5108 |
bool Any = false; |
| 5109 |
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { |
5109 |
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { |
| 5110 |
Formula &F = LU.Formulae[i]; |
5110 |
Formula &F = LU.Formulae[i]; |
| 5111 |
if (!F.referencesReg(Best)) { |
5111 |
if (!F.referencesReg(Best)) { |
| 5112 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); |
5112 |
LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); |
| 5113 |
LU.DeleteFormula(F); |
5113 |
LU.DeleteFormula(F); |
| 5114 |
--e; |
5114 |
--e; |
| 5115 |
--i; |
5115 |
--i; |
| 5116 |
Any = true; |
5116 |
Any = true; |
| 5117 |
assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?"); |
5117 |
assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?"); |
| 5118 |
continue; |
5118 |
continue; |
| 5119 |
} |
5119 |
} |
| 5120 |
} |
5120 |
} |
| 5121 |
|
5121 |
|
| 5122 |
if (Any) |
5122 |
if (Any) |
| 5123 |
LU.RecomputeRegs(LUIdx, RegUses); |
5123 |
LU.RecomputeRegs(LUIdx, RegUses); |
| 5124 |
} |
5124 |
} |
| 5125 |
|
5125 |
|
| 5126 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
5126 |
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); |
| 5127 |
} |
5127 |
} |
| 5128 |
} |
5128 |
} |
| 5129 |
|
5129 |
|
| 5130 |
/// If there are an extraordinary number of formulae to choose from, use some |
5130 |
/// If there are an extraordinary number of formulae to choose from, use some |
| 5131 |
/// rough heuristics to prune down the number of formulae. This keeps the main |
5131 |
/// rough heuristics to prune down the number of formulae. This keeps the main |
| 5132 |
/// solver from taking an extraordinary amount of time in some worst-case |
5132 |
/// solver from taking an extraordinary amount of time in some worst-case |
| 5133 |
/// scenarios. |
5133 |
/// scenarios. |
| 5134 |
void LSRInstance::NarrowSearchSpaceUsingHeuristics() { |
5134 |
void LSRInstance::NarrowSearchSpaceUsingHeuristics() { |
| 5135 |
NarrowSearchSpaceByDetectingSupersets(); |
5135 |
NarrowSearchSpaceByDetectingSupersets(); |
| 5136 |
NarrowSearchSpaceByCollapsingUnrolledCode(); |
5136 |
NarrowSearchSpaceByCollapsingUnrolledCode(); |
| 5137 |
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); |
5137 |
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); |
| 5138 |
if (FilterSameScaledReg) |
5138 |
if (FilterSameScaledReg) |
| 5139 |
NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); |
5139 |
NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); |
| 5140 |
NarrowSearchSpaceByFilterPostInc(); |
5140 |
NarrowSearchSpaceByFilterPostInc(); |
| 5141 |
if (LSRExpNarrow) |
5141 |
if (LSRExpNarrow) |
| 5142 |
NarrowSearchSpaceByDeletingCostlyFormulas(); |
5142 |
NarrowSearchSpaceByDeletingCostlyFormulas(); |
| 5143 |
else |
5143 |
else |
| 5144 |
NarrowSearchSpaceByPickingWinnerRegs(); |
5144 |
NarrowSearchSpaceByPickingWinnerRegs(); |
| 5145 |
} |
5145 |
} |
| 5146 |
|
5146 |
|
| 5147 |
/// This is the recursive solver. |
5147 |
/// This is the recursive solver. |
| 5148 |
void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, |
5148 |
void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, |
| 5149 |
Cost &SolutionCost, |
5149 |
Cost &SolutionCost, |
| 5150 |
SmallVectorImpl &Workspace, |
5150 |
SmallVectorImpl &Workspace, |
| 5151 |
const Cost &CurCost, |
5151 |
const Cost &CurCost, |
| 5152 |
const SmallPtrSet &CurRegs, |
5152 |
const SmallPtrSet &CurRegs, |
| 5153 |
DenseSet &VisitedRegs) const { |
5153 |
DenseSet &VisitedRegs) const { |
| 5154 |
// Some ideas: |
5154 |
// Some ideas: |
| 5155 |
// - prune more: |
5155 |
// - prune more: |
| 5156 |
// - use more aggressive filtering |
5156 |
// - use more aggressive filtering |
| 5157 |
// - sort the formula so that the most profitable solutions are found first |
5157 |
// - sort the formula so that the most profitable solutions are found first |
| 5158 |
// - sort the uses too |
5158 |
// - sort the uses too |
| 5159 |
// - search faster: |
5159 |
// - search faster: |
| 5160 |
// - don't compute a cost, and then compare. compare while computing a cost |
5160 |
// - don't compute a cost, and then compare. compare while computing a cost |
| 5161 |
// and bail early. |
5161 |
// and bail early. |
| 5162 |
// - track register sets with SmallBitVector |
5162 |
// - track register sets with SmallBitVector |
| 5163 |
|
5163 |
|
| 5164 |
const LSRUse &LU = Uses[Workspace.size()]; |
5164 |
const LSRUse &LU = Uses[Workspace.size()]; |
| 5165 |
|
5165 |
|
| 5166 |
// If this use references any register that's already a part of the |
5166 |
// If this use references any register that's already a part of the |
| 5167 |
// in-progress solution, consider it a requirement that a formula must |
5167 |
// in-progress solution, consider it a requirement that a formula must |
| 5168 |
// reference that register in order to be considered. This prunes out |
5168 |
// reference that register in order to be considered. This prunes out |
| 5169 |
// unprofitable searching. |
5169 |
// unprofitable searching. |
| 5170 |
SmallSetVector ReqRegs; |
5170 |
SmallSetVector ReqRegs; |
| 5171 |
for (const SCEV *S : CurRegs) |
5171 |
for (const SCEV *S : CurRegs) |
| 5172 |
if (LU.Regs.count(S)) |
5172 |
if (LU.Regs.count(S)) |
| 5173 |
ReqRegs.insert(S); |
5173 |
ReqRegs.insert(S); |
| 5174 |
|
5174 |
|
| 5175 |
SmallPtrSet NewRegs; |
5175 |
SmallPtrSet NewRegs; |
| 5176 |
Cost NewCost(L, SE, TTI, AMK); |
5176 |
Cost NewCost(L, SE, TTI, AMK); |
| 5177 |
for (const Formula &F : LU.Formulae) { |
5177 |
for (const Formula &F : LU.Formulae) { |
| 5178 |
// Ignore formulae which may not be ideal in terms of register reuse of |
5178 |
// Ignore formulae which may not be ideal in terms of register reuse of |
| 5179 |
// ReqRegs. The formula should use all required registers before |
5179 |
// ReqRegs. The formula should use all required registers before |
| 5180 |
// introducing new ones. |
5180 |
// introducing new ones. |
| 5181 |
// This can sometimes (notably when trying to favour postinc) lead to |
5181 |
// This can sometimes (notably when trying to favour postinc) lead to |
| 5182 |
// sub-optimial decisions. There it is best left to the cost modelling to |
5182 |
// sub-optimial decisions. There it is best left to the cost modelling to |
| 5183 |
// get correct. |
5183 |
// get correct. |
| 5184 |
if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) { |
5184 |
if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) { |
| 5185 |
int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); |
5185 |
int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); |
| 5186 |
for (const SCEV *Reg : ReqRegs) { |
5186 |
for (const SCEV *Reg : ReqRegs) { |
| 5187 |
if ((F.ScaledReg && F.ScaledReg == Reg) || |
5187 |
if ((F.ScaledReg && F.ScaledReg == Reg) || |
| 5188 |
is_contained(F.BaseRegs, Reg)) { |
5188 |
is_contained(F.BaseRegs, Reg)) { |
| 5189 |
--NumReqRegsToFind; |
5189 |
--NumReqRegsToFind; |
| 5190 |
if (NumReqRegsToFind == 0) |
5190 |
if (NumReqRegsToFind == 0) |
| 5191 |
break; |
5191 |
break; |
| 5192 |
} |
5192 |
} |
| 5193 |
} |
5193 |
} |
| 5194 |
if (NumReqRegsToFind != 0) { |
5194 |
if (NumReqRegsToFind != 0) { |
| 5195 |
// If none of the formulae satisfied the required registers, then we could |
5195 |
// If none of the formulae satisfied the required registers, then we could |
| 5196 |
// clear ReqRegs and try again. Currently, we simply give up in this case. |
5196 |
// clear ReqRegs and try again. Currently, we simply give up in this case. |
| 5197 |
continue; |
5197 |
continue; |
| 5198 |
} |
5198 |
} |
| 5199 |
} |
5199 |
} |
| 5200 |
|
5200 |
|
| 5201 |
// Evaluate the cost of the current formula. If it's already worse than |
5201 |
// Evaluate the cost of the current formula. If it's already worse than |
| 5202 |
// the current best, prune the search at that point. |
5202 |
// the current best, prune the search at that point. |
| 5203 |
NewCost = CurCost; |
5203 |
NewCost = CurCost; |
| 5204 |
NewRegs = CurRegs; |
5204 |
NewRegs = CurRegs; |
| 5205 |
NewCost.RateFormula(F, NewRegs, VisitedRegs, LU); |
5205 |
NewCost.RateFormula(F, NewRegs, VisitedRegs, LU); |
| 5206 |
if (NewCost.isLess(SolutionCost)) { |
5206 |
if (NewCost.isLess(SolutionCost)) { |
| 5207 |
Workspace.push_back(&F); |
5207 |
Workspace.push_back(&F); |
| 5208 |
if (Workspace.size() != Uses.size()) { |
5208 |
if (Workspace.size() != Uses.size()) { |
| 5209 |
SolveRecurse(Solution, SolutionCost, Workspace, NewCost, |
5209 |
SolveRecurse(Solution, SolutionCost, Workspace, NewCost, |
| 5210 |
NewRegs, VisitedRegs); |
5210 |
NewRegs, VisitedRegs); |
| 5211 |
if (F.getNumRegs() == 1 && Workspace.size() == 1) |
5211 |
if (F.getNumRegs() == 1 && Workspace.size() == 1) |
| 5212 |
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); |
5212 |
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); |
| 5213 |
} else { |
5213 |
} else { |
| 5214 |
LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); |
5214 |
LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); |
| 5215 |
dbgs() << ".\nRegs:\n"; |
5215 |
dbgs() << ".\nRegs:\n"; |
| 5216 |
for (const SCEV *S : NewRegs) dbgs() |
5216 |
for (const SCEV *S : NewRegs) dbgs() |
| 5217 |
<< "- " << *S << "\n"; |
5217 |
<< "- " << *S << "\n"; |
| 5218 |
dbgs() << '\n'); |
5218 |
dbgs() << '\n'); |
| 5219 |
|
5219 |
|
| 5220 |
SolutionCost = NewCost; |
5220 |
SolutionCost = NewCost; |
| 5221 |
Solution = Workspace; |
5221 |
Solution = Workspace; |
| 5222 |
} |
5222 |
} |
| 5223 |
Workspace.pop_back(); |
5223 |
Workspace.pop_back(); |
| 5224 |
} |
5224 |
} |
| 5225 |
} |
5225 |
} |
| 5226 |
} |
5226 |
} |
| 5227 |
|
5227 |
|
| 5228 |
/// Choose one formula from each use. Return the results in the given Solution |
5228 |
/// Choose one formula from each use. Return the results in the given Solution |
| 5229 |
/// vector. |
5229 |
/// vector. |
| 5230 |
void LSRInstance::Solve(SmallVectorImpl &Solution) const { |
5230 |
void LSRInstance::Solve(SmallVectorImpl &Solution) const { |
| 5231 |
SmallVector Workspace; |
5231 |
SmallVector Workspace; |
| 5232 |
Cost SolutionCost(L, SE, TTI, AMK); |
5232 |
Cost SolutionCost(L, SE, TTI, AMK); |
| 5233 |
SolutionCost.Lose(); |
5233 |
SolutionCost.Lose(); |
| 5234 |
Cost CurCost(L, SE, TTI, AMK); |
5234 |
Cost CurCost(L, SE, TTI, AMK); |
| 5235 |
SmallPtrSet CurRegs; |
5235 |
SmallPtrSet CurRegs; |
| 5236 |
DenseSet VisitedRegs; |
5236 |
DenseSet VisitedRegs; |
| 5237 |
Workspace.reserve(Uses.size()); |
5237 |
Workspace.reserve(Uses.size()); |
| 5238 |
|
5238 |
|
| 5239 |
// SolveRecurse does all the work. |
5239 |
// SolveRecurse does all the work. |
| 5240 |
SolveRecurse(Solution, SolutionCost, Workspace, CurCost, |
5240 |
SolveRecurse(Solution, SolutionCost, Workspace, CurCost, |
| 5241 |
CurRegs, VisitedRegs); |
5241 |
CurRegs, VisitedRegs); |
| 5242 |
if (Solution.empty()) { |
5242 |
if (Solution.empty()) { |
| 5243 |
LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); |
5243 |
LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); |
| 5244 |
return; |
5244 |
return; |
| 5245 |
} |
5245 |
} |
| 5246 |
|
5246 |
|
| 5247 |
// Ok, we've now made all our decisions. |
5247 |
// Ok, we've now made all our decisions. |
| 5248 |
LLVM_DEBUG(dbgs() << "\n" |
5248 |
LLVM_DEBUG(dbgs() << "\n" |
| 5249 |
"The chosen solution requires "; |
5249 |
"The chosen solution requires "; |
| 5250 |
SolutionCost.print(dbgs()); dbgs() << ":\n"; |
5250 |
SolutionCost.print(dbgs()); dbgs() << ":\n"; |
| 5251 |
for (size_t i = 0, e = Uses.size(); i != e; ++i) { |
5251 |
for (size_t i = 0, e = Uses.size(); i != e; ++i) { |
| 5252 |
dbgs() << " "; |
5252 |
dbgs() << " "; |
| 5253 |
Uses[i].print(dbgs()); |
5253 |
Uses[i].print(dbgs()); |
| 5254 |
dbgs() << "\n" |
5254 |
dbgs() << "\n" |
| 5255 |
" "; |
5255 |
" "; |
| 5256 |
Solution[i]->print(dbgs()); |
5256 |
Solution[i]->print(dbgs()); |
| 5257 |
dbgs() << '\n'; |
5257 |
dbgs() << '\n'; |
| 5258 |
}); |
5258 |
}); |
| 5259 |
|
5259 |
|
| 5260 |
assert(Solution.size() == Uses.size() && "Malformed solution!"); |
5260 |
assert(Solution.size() == Uses.size() && "Malformed solution!"); |
| 5261 |
|
5261 |
|
| 5262 |
if (BaselineCost.isLess(SolutionCost)) { |
5262 |
if (BaselineCost.isLess(SolutionCost)) { |
| 5263 |
LLVM_DEBUG(dbgs() << "The baseline solution requires "; |
5263 |
LLVM_DEBUG(dbgs() << "The baseline solution requires "; |
| 5264 |
BaselineCost.print(dbgs()); dbgs() << "\n"); |
5264 |
BaselineCost.print(dbgs()); dbgs() << "\n"); |
| 5265 |
if (!AllowDropSolutionIfLessProfitable) |
5265 |
if (!AllowDropSolutionIfLessProfitable) |
| 5266 |
LLVM_DEBUG( |
5266 |
LLVM_DEBUG( |
| 5267 |
dbgs() << "Baseline is more profitable than chosen solution, " |
5267 |
dbgs() << "Baseline is more profitable than chosen solution, " |
| 5268 |
"add option 'lsr-drop-solution' to drop LSR solution.\n"); |
5268 |
"add option 'lsr-drop-solution' to drop LSR solution.\n"); |
| 5269 |
else { |
5269 |
else { |
| 5270 |
LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen " |
5270 |
LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen " |
| 5271 |
"solution, dropping LSR solution.\n";); |
5271 |
"solution, dropping LSR solution.\n";); |
| 5272 |
Solution.clear(); |
5272 |
Solution.clear(); |
| 5273 |
} |
5273 |
} |
| 5274 |
} |
5274 |
} |
| 5275 |
} |
5275 |
} |
| 5276 |
|
5276 |
|
| 5277 |
/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as |
5277 |
/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as |
| 5278 |
/// we can go while still being dominated by the input positions. This helps |
5278 |
/// we can go while still being dominated by the input positions. This helps |
| 5279 |
/// canonicalize the insert position, which encourages sharing. |
5279 |
/// canonicalize the insert position, which encourages sharing. |
| 5280 |
BasicBlock::iterator |
5280 |
BasicBlock::iterator |
| 5281 |
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, |
5281 |
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, |
| 5282 |
const SmallVectorImpl &Inputs) |
5282 |
const SmallVectorImpl &Inputs) |
| 5283 |
const { |
5283 |
const { |
| 5284 |
Instruction *Tentative = &*IP; |
5284 |
Instruction *Tentative = &*IP; |
| 5285 |
while (true) { |
5285 |
while (true) { |
| 5286 |
bool AllDominate = true; |
5286 |
bool AllDominate = true; |
| 5287 |
Instruction *BetterPos = nullptr; |
5287 |
Instruction *BetterPos = nullptr; |
| 5288 |
// Don't bother attempting to insert before a catchswitch, their basic block |
5288 |
// Don't bother attempting to insert before a catchswitch, their basic block |
| 5289 |
// cannot have other non-PHI instructions. |
5289 |
// cannot have other non-PHI instructions. |
| 5290 |
if (isa(Tentative)) |
5290 |
if (isa(Tentative)) |
| 5291 |
return IP; |
5291 |
return IP; |
| 5292 |
|
5292 |
|
| 5293 |
for (Instruction *Inst : Inputs) { |
5293 |
for (Instruction *Inst : Inputs) { |
| 5294 |
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { |
5294 |
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { |
| 5295 |
AllDominate = false; |
5295 |
AllDominate = false; |
| 5296 |
break; |
5296 |
break; |
| 5297 |
} |
5297 |
} |
| 5298 |
// Attempt to find an insert position in the middle of the block, |
5298 |
// Attempt to find an insert position in the middle of the block, |
| 5299 |
// instead of at the end, so that it can be used for other expansions. |
5299 |
// instead of at the end, so that it can be used for other expansions. |
| 5300 |
if (Tentative->getParent() == Inst->getParent() && |
5300 |
if (Tentative->getParent() == Inst->getParent() && |
| 5301 |
(!BetterPos || !DT.dominates(Inst, BetterPos))) |
5301 |
(!BetterPos || !DT.dominates(Inst, BetterPos))) |
| 5302 |
BetterPos = &*std::next(BasicBlock::iterator(Inst)); |
5302 |
BetterPos = &*std::next(BasicBlock::iterator(Inst)); |
| 5303 |
} |
5303 |
} |
| 5304 |
if (!AllDominate) |
5304 |
if (!AllDominate) |
| 5305 |
break; |
5305 |
break; |
| 5306 |
if (BetterPos) |
5306 |
if (BetterPos) |
| 5307 |
IP = BetterPos->getIterator(); |
5307 |
IP = BetterPos->getIterator(); |
| 5308 |
else |
5308 |
else |
| 5309 |
IP = Tentative->getIterator(); |
5309 |
IP = Tentative->getIterator(); |
| 5310 |
|
5310 |
|
| 5311 |
const Loop *IPLoop = LI.getLoopFor(IP->getParent()); |
5311 |
const Loop *IPLoop = LI.getLoopFor(IP->getParent()); |
| 5312 |
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; |
5312 |
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; |
| 5313 |
|
5313 |
|
| 5314 |
BasicBlock *IDom; |
5314 |
BasicBlock *IDom; |
| 5315 |
for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { |
5315 |
for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { |
| 5316 |
if (!Rung) return IP; |
5316 |
if (!Rung) return IP; |
| 5317 |
Rung = Rung->getIDom(); |
5317 |
Rung = Rung->getIDom(); |
| 5318 |
if (!Rung) return IP; |
5318 |
if (!Rung) return IP; |
| 5319 |
IDom = Rung->getBlock(); |
5319 |
IDom = Rung->getBlock(); |
| 5320 |
|
5320 |
|
| 5321 |
// Don't climb into a loop though. |
5321 |
// Don't climb into a loop though. |
| 5322 |
const Loop *IDomLoop = LI.getLoopFor(IDom); |
5322 |
const Loop *IDomLoop = LI.getLoopFor(IDom); |
| 5323 |
unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; |
5323 |
unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; |
| 5324 |
if (IDomDepth <= IPLoopDepth && |
5324 |
if (IDomDepth <= IPLoopDepth && |
| 5325 |
(IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) |
5325 |
(IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) |
| 5326 |
break; |
5326 |
break; |
| 5327 |
} |
5327 |
} |
| 5328 |
|
5328 |
|
| 5329 |
Tentative = IDom->getTerminator(); |
5329 |
Tentative = IDom->getTerminator(); |
| 5330 |
} |
5330 |
} |
| 5331 |
|
5331 |
|
| 5332 |
return IP; |
5332 |
return IP; |
| 5333 |
} |
5333 |
} |
| 5334 |
|
5334 |
|
| 5335 |
/// Determine an input position which will be dominated by the operands and |
5335 |
/// Determine an input position which will be dominated by the operands and |
| 5336 |
/// which will dominate the result. |
5336 |
/// which will dominate the result. |
| 5337 |
BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand( |
5337 |
BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand( |
| 5338 |
BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const { |
5338 |
BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const { |
| 5339 |
// Collect some instructions which must be dominated by the |
5339 |
// Collect some instructions which must be dominated by the |
| 5340 |
// expanding replacement. These must be dominated by any operands that |
5340 |
// expanding replacement. These must be dominated by any operands that |
| 5341 |
// will be required in the expansion. |
5341 |
// will be required in the expansion. |
| 5342 |
SmallVector Inputs; |
5342 |
SmallVector Inputs; |
| 5343 |
if (Instruction *I = dyn_cast(LF.OperandValToReplace)) |
5343 |
if (Instruction *I = dyn_cast(LF.OperandValToReplace)) |
| 5344 |
Inputs.push_back(I); |
5344 |
Inputs.push_back(I); |
| 5345 |
if (LU.Kind == LSRUse::ICmpZero) |
5345 |
if (LU.Kind == LSRUse::ICmpZero) |
| 5346 |
if (Instruction *I = |
5346 |
if (Instruction *I = |
| 5347 |
dyn_cast(cast(LF.UserInst)->getOperand(1))) |
5347 |
dyn_cast(cast(LF.UserInst)->getOperand(1))) |
| 5348 |
Inputs.push_back(I); |
5348 |
Inputs.push_back(I); |
| 5349 |
if (LF.PostIncLoops.count(L)) { |
5349 |
if (LF.PostIncLoops.count(L)) { |
| 5350 |
if (LF.isUseFullyOutsideLoop(L)) |
5350 |
if (LF.isUseFullyOutsideLoop(L)) |
| 5351 |
Inputs.push_back(L->getLoopLatch()->getTerminator()); |
5351 |
Inputs.push_back(L->getLoopLatch()->getTerminator()); |
| 5352 |
else |
5352 |
else |
| 5353 |
Inputs.push_back(IVIncInsertPos); |
5353 |
Inputs.push_back(IVIncInsertPos); |
| 5354 |
} |
5354 |
} |
| 5355 |
// The expansion must also be dominated by the increment positions of any |
5355 |
// The expansion must also be dominated by the increment positions of any |
| 5356 |
// loops it for which it is using post-inc mode. |
5356 |
// loops it for which it is using post-inc mode. |
| 5357 |
for (const Loop *PIL : LF.PostIncLoops) { |
5357 |
for (const Loop *PIL : LF.PostIncLoops) { |
| 5358 |
if (PIL == L) continue; |
5358 |
if (PIL == L) continue; |
| 5359 |
|
5359 |
|
| 5360 |
// Be dominated by the loop exit. |
5360 |
// Be dominated by the loop exit. |
| 5361 |
SmallVector ExitingBlocks; |
5361 |
SmallVector ExitingBlocks; |
| 5362 |
PIL->getExitingBlocks(ExitingBlocks); |
5362 |
PIL->getExitingBlocks(ExitingBlocks); |
| 5363 |
if (!ExitingBlocks.empty()) { |
5363 |
if (!ExitingBlocks.empty()) { |
| 5364 |
BasicBlock *BB = ExitingBlocks[0]; |
5364 |
BasicBlock *BB = ExitingBlocks[0]; |
| 5365 |
for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i) |
5365 |
for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i) |
| 5366 |
BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]); |
5366 |
BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]); |
| 5367 |
Inputs.push_back(BB->getTerminator()); |
5367 |
Inputs.push_back(BB->getTerminator()); |
| 5368 |
} |
5368 |
} |
| 5369 |
} |
5369 |
} |
| 5370 |
|
5370 |
|
| 5371 |
assert(!isa(LowestIP) && !LowestIP->isEHPad() |
5371 |
assert(!isa(LowestIP) && !LowestIP->isEHPad() |
| 5372 |
&& !isa(LowestIP) && |
5372 |
&& !isa(LowestIP) && |
| 5373 |
"Insertion point must be a normal instruction"); |
5373 |
"Insertion point must be a normal instruction"); |
| 5374 |
|
5374 |
|
| 5375 |
// Then, climb up the immediate dominator tree as far as we can go while |
5375 |
// Then, climb up the immediate dominator tree as far as we can go while |
| 5376 |
// still being dominated by the input positions. |
5376 |
// still being dominated by the input positions. |
| 5377 |
BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs); |
5377 |
BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs); |
| 5378 |
|
5378 |
|
| 5379 |
// Don't insert instructions before PHI nodes. |
5379 |
// Don't insert instructions before PHI nodes. |
| 5380 |
while (isa(IP)) ++IP; |
5380 |
while (isa(IP)) ++IP; |
| 5381 |
|
5381 |
|
| 5382 |
// Ignore landingpad instructions. |
5382 |
// Ignore landingpad instructions. |
| 5383 |
while (IP->isEHPad()) ++IP; |
5383 |
while (IP->isEHPad()) ++IP; |
| 5384 |
|
5384 |
|
| 5385 |
// Ignore debug intrinsics. |
5385 |
// Ignore debug intrinsics. |
| 5386 |
while (isa(IP)) ++IP; |
5386 |
while (isa(IP)) ++IP; |
| 5387 |
|
5387 |
|
| 5388 |
// Set IP below instructions recently inserted by SCEVExpander. This keeps the |
5388 |
// Set IP below instructions recently inserted by SCEVExpander. This keeps the |
| 5389 |
// IP consistent across expansions and allows the previously inserted |
5389 |
// IP consistent across expansions and allows the previously inserted |
| 5390 |
// instructions to be reused by subsequent expansion. |
5390 |
// instructions to be reused by subsequent expansion. |
| 5391 |
while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP) |
5391 |
while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP) |
| 5392 |
++IP; |
5392 |
++IP; |
| 5393 |
|
5393 |
|
| 5394 |
return IP; |
5394 |
return IP; |
| 5395 |
} |
5395 |
} |
| 5396 |
|
5396 |
|
| 5397 |
/// Emit instructions for the leading candidate expression for this LSRUse (this |
5397 |
/// Emit instructions for the leading candidate expression for this LSRUse (this |
| 5398 |
/// is called "expanding"). |
5398 |
/// is called "expanding"). |
| 5399 |
Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, |
5399 |
Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, |
| 5400 |
const Formula &F, BasicBlock::iterator IP, |
5400 |
const Formula &F, BasicBlock::iterator IP, |
| 5401 |
SmallVectorImpl &DeadInsts) const { |
5401 |
SmallVectorImpl &DeadInsts) const { |
| 5402 |
if (LU.RigidFormula) |
5402 |
if (LU.RigidFormula) |
| 5403 |
return LF.OperandValToReplace; |
5403 |
return LF.OperandValToReplace; |
| 5404 |
|
5404 |
|
| 5405 |
// Determine an input position which will be dominated by the operands and |
5405 |
// Determine an input position which will be dominated by the operands and |
| 5406 |
// which will dominate the result. |
5406 |
// which will dominate the result. |
| 5407 |
IP = AdjustInsertPositionForExpand(IP, LF, LU); |
5407 |
IP = AdjustInsertPositionForExpand(IP, LF, LU); |
| 5408 |
Rewriter.setInsertPoint(&*IP); |
5408 |
Rewriter.setInsertPoint(&*IP); |
| 5409 |
|
5409 |
|
| 5410 |
// Inform the Rewriter if we have a post-increment use, so that it can |
5410 |
// Inform the Rewriter if we have a post-increment use, so that it can |
| 5411 |
// perform an advantageous expansion. |
5411 |
// perform an advantageous expansion. |
| 5412 |
Rewriter.setPostInc(LF.PostIncLoops); |
5412 |
Rewriter.setPostInc(LF.PostIncLoops); |
| 5413 |
|
5413 |
|
| 5414 |
// This is the type that the user actually needs. |
5414 |
// This is the type that the user actually needs. |
| 5415 |
Type *OpTy = LF.OperandValToReplace->getType(); |
5415 |
Type *OpTy = LF.OperandValToReplace->getType(); |
| 5416 |
// This will be the type that we'll initially expand to. |
5416 |
// This will be the type that we'll initially expand to. |
| 5417 |
Type *Ty = F.getType(); |
5417 |
Type *Ty = F.getType(); |
| 5418 |
if (!Ty) |
5418 |
if (!Ty) |
| 5419 |
// No type known; just expand directly to the ultimate type. |
5419 |
// No type known; just expand directly to the ultimate type. |
| 5420 |
Ty = OpTy; |
5420 |
Ty = OpTy; |
| 5421 |
else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy)) |
5421 |
else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy)) |
| 5422 |
// Expand directly to the ultimate type if it's the right size. |
5422 |
// Expand directly to the ultimate type if it's the right size. |
| 5423 |
Ty = OpTy; |
5423 |
Ty = OpTy; |
| 5424 |
// This is the type to do integer arithmetic in. |
5424 |
// This is the type to do integer arithmetic in. |
| 5425 |
Type *IntTy = SE.getEffectiveSCEVType(Ty); |
5425 |
Type *IntTy = SE.getEffectiveSCEVType(Ty); |
| 5426 |
|
5426 |
|
| 5427 |
// Build up a list of operands to add together to form the full base. |
5427 |
// Build up a list of operands to add together to form the full base. |
| 5428 |
SmallVector Ops; |
5428 |
SmallVector Ops; |
| 5429 |
|
5429 |
|
| 5430 |
// Expand the BaseRegs portion. |
5430 |
// Expand the BaseRegs portion. |
| 5431 |
for (const SCEV *Reg : F.BaseRegs) { |
5431 |
for (const SCEV *Reg : F.BaseRegs) { |
| 5432 |
assert(!Reg->isZero() && "Zero allocated in a base register!"); |
5432 |
assert(!Reg->isZero() && "Zero allocated in a base register!"); |
| 5433 |
|
5433 |
|
| 5434 |
// If we're expanding for a post-inc user, make the post-inc adjustment. |
5434 |
// If we're expanding for a post-inc user, make the post-inc adjustment. |
| 5435 |
Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE); |
5435 |
Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE); |
| 5436 |
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); |
5436 |
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); |
| 5437 |
} |
5437 |
} |
| 5438 |
|
5438 |
|
| 5439 |
// Expand the ScaledReg portion. |
5439 |
// Expand the ScaledReg portion. |
| 5440 |
Value *ICmpScaledV = nullptr; |
5440 |
Value *ICmpScaledV = nullptr; |
| 5441 |
if (F.Scale != 0) { |
5441 |
if (F.Scale != 0) { |
| 5442 |
const SCEV *ScaledS = F.ScaledReg; |
5442 |
const SCEV *ScaledS = F.ScaledReg; |
| 5443 |
|
5443 |
|
| 5444 |
// If we're expanding for a post-inc user, make the post-inc adjustment. |
5444 |
// If we're expanding for a post-inc user, make the post-inc adjustment. |
| 5445 |
PostIncLoopSet &Loops = const_cast(LF.PostIncLoops); |
5445 |
PostIncLoopSet &Loops = const_cast(LF.PostIncLoops); |
| 5446 |
ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE); |
5446 |
ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE); |
| 5447 |
|
5447 |
|
| 5448 |
if (LU.Kind == LSRUse::ICmpZero) { |
5448 |
if (LU.Kind == LSRUse::ICmpZero) { |
| 5449 |
// Expand ScaleReg as if it was part of the base regs. |
5449 |
// Expand ScaleReg as if it was part of the base regs. |
| 5450 |
if (F.Scale == 1) |
5450 |
if (F.Scale == 1) |
| 5451 |
Ops.push_back( |
5451 |
Ops.push_back( |
| 5452 |
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr))); |
5452 |
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr))); |
| 5453 |
else { |
5453 |
else { |
| 5454 |
// An interesting way of "folding" with an icmp is to use a negated |
5454 |
// An interesting way of "folding" with an icmp is to use a negated |
| 5455 |
// scale, which we'll implement by inserting it into the other operand |
5455 |
// scale, which we'll implement by inserting it into the other operand |
| 5456 |
// of the icmp. |
5456 |
// of the icmp. |
| 5457 |
assert(F.Scale == -1 && |
5457 |
assert(F.Scale == -1 && |
| 5458 |
"The only scale supported by ICmpZero uses is -1!"); |
5458 |
"The only scale supported by ICmpZero uses is -1!"); |
| 5459 |
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr); |
5459 |
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr); |
| 5460 |
} |
5460 |
} |
| 5461 |
} else { |
5461 |
} else { |
| 5462 |
// Otherwise just expand the scaled register and an explicit scale, |
5462 |
// Otherwise just expand the scaled register and an explicit scale, |
| 5463 |
// which is expected to be matched as part of the address. |
5463 |
// which is expected to be matched as part of the address. |
| 5464 |
|
5464 |
|
| 5465 |
// Flush the operand list to suppress SCEVExpander hoisting address modes. |
5465 |
// Flush the operand list to suppress SCEVExpander hoisting address modes. |
| 5466 |
// Unless the addressing mode will not be folded. |
5466 |
// Unless the addressing mode will not be folded. |
| 5467 |
if (!Ops.empty() && LU.Kind == LSRUse::Address && |
5467 |
if (!Ops.empty() && LU.Kind == LSRUse::Address && |
| 5468 |
isAMCompletelyFolded(TTI, LU, F)) { |
5468 |
isAMCompletelyFolded(TTI, LU, F)) { |
| 5469 |
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr); |
5469 |
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr); |
| 5470 |
Ops.clear(); |
5470 |
Ops.clear(); |
| 5471 |
Ops.push_back(SE.getUnknown(FullV)); |
5471 |
Ops.push_back(SE.getUnknown(FullV)); |
| 5472 |
} |
5472 |
} |
| 5473 |
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)); |
5473 |
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)); |
| 5474 |
if (F.Scale != 1) |
5474 |
if (F.Scale != 1) |
| 5475 |
ScaledS = |
5475 |
ScaledS = |
| 5476 |
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); |
5476 |
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); |
| 5477 |
Ops.push_back(ScaledS); |
5477 |
Ops.push_back(ScaledS); |
| 5478 |
} |
5478 |
} |
| 5479 |
} |
5479 |
} |
| 5480 |
|
5480 |
|
| 5481 |
// Expand the GV portion. |
5481 |
// Expand the GV portion. |
| 5482 |
if (F.BaseGV) { |
5482 |
if (F.BaseGV) { |
| 5483 |
// Flush the operand list to suppress SCEVExpander hoisting. |
5483 |
// Flush the operand list to suppress SCEVExpander hoisting. |
| 5484 |
if (!Ops.empty()) { |
5484 |
if (!Ops.empty()) { |
| 5485 |
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy); |
5485 |
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy); |
| 5486 |
Ops.clear(); |
5486 |
Ops.clear(); |
| 5487 |
Ops.push_back(SE.getUnknown(FullV)); |
5487 |
Ops.push_back(SE.getUnknown(FullV)); |
| 5488 |
} |
5488 |
} |
| 5489 |
Ops.push_back(SE.getUnknown(F.BaseGV)); |
5489 |
Ops.push_back(SE.getUnknown(F.BaseGV)); |
| 5490 |
} |
5490 |
} |
| 5491 |
|
5491 |
|
| 5492 |
// Flush the operand list to suppress SCEVExpander hoisting of both folded and |
5492 |
// Flush the operand list to suppress SCEVExpander hoisting of both folded and |
| 5493 |
// unfolded offsets. LSR assumes they both live next to their uses. |
5493 |
// unfolded offsets. LSR assumes they both live next to their uses. |
| 5494 |
if (!Ops.empty()) { |
5494 |
if (!Ops.empty()) { |
| 5495 |
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); |
5495 |
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); |
| 5496 |
Ops.clear(); |
5496 |
Ops.clear(); |
| 5497 |
Ops.push_back(SE.getUnknown(FullV)); |
5497 |
Ops.push_back(SE.getUnknown(FullV)); |
| 5498 |
} |
5498 |
} |
| 5499 |
|
5499 |
|
| 5500 |
// Expand the immediate portion. |
5500 |
// Expand the immediate portion. |
| 5501 |
int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset; |
5501 |
int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset; |
| 5502 |
if (Offset != 0) { |
5502 |
if (Offset != 0) { |
| 5503 |
if (LU.Kind == LSRUse::ICmpZero) { |
5503 |
if (LU.Kind == LSRUse::ICmpZero) { |
| 5504 |
// The other interesting way of "folding" with an ICmpZero is to use a |
5504 |
// The other interesting way of "folding" with an ICmpZero is to use a |
| 5505 |
// negated immediate. |
5505 |
// negated immediate. |
| 5506 |
if (!ICmpScaledV) |
5506 |
if (!ICmpScaledV) |
| 5507 |
ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); |
5507 |
ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); |
| 5508 |
else { |
5508 |
else { |
| 5509 |
Ops.push_back(SE.getUnknown(ICmpScaledV)); |
5509 |
Ops.push_back(SE.getUnknown(ICmpScaledV)); |
| 5510 |
ICmpScaledV = ConstantInt::get(IntTy, Offset); |
5510 |
ICmpScaledV = ConstantInt::get(IntTy, Offset); |
| 5511 |
} |
5511 |
} |
| 5512 |
} else { |
5512 |
} else { |
| 5513 |
// Just add the immediate values. These again are expected to be matched |
5513 |
// Just add the immediate values. These again are expected to be matched |
| 5514 |
// as part of the address. |
5514 |
// as part of the address. |
| 5515 |
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset))); |
5515 |
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset))); |
| 5516 |
} |
5516 |
} |
| 5517 |
} |
5517 |
} |
| 5518 |
|
5518 |
|
| 5519 |
// Expand the unfolded offset portion. |
5519 |
// Expand the unfolded offset portion. |
| 5520 |
int64_t UnfoldedOffset = F.UnfoldedOffset; |
5520 |
int64_t UnfoldedOffset = F.UnfoldedOffset; |
| 5521 |
if (UnfoldedOffset != 0) { |
5521 |
if (UnfoldedOffset != 0) { |
| 5522 |
// Just add the immediate values. |
5522 |
// Just add the immediate values. |
| 5523 |
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, |
5523 |
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, |
| 5524 |
UnfoldedOffset))); |
5524 |
UnfoldedOffset))); |
| 5525 |
} |
5525 |
} |
| 5526 |
|
5526 |
|
| 5527 |
// Emit instructions summing all the operands. |
5527 |
// Emit instructions summing all the operands. |
| 5528 |
const SCEV *FullS = Ops.empty() ? |
5528 |
const SCEV *FullS = Ops.empty() ? |
| 5529 |
SE.getConstant(IntTy, 0) : |
5529 |
SE.getConstant(IntTy, 0) : |
| 5530 |
SE.getAddExpr(Ops); |
5530 |
SE.getAddExpr(Ops); |
| 5531 |
Value *FullV = Rewriter.expandCodeFor(FullS, Ty); |
5531 |
Value *FullV = Rewriter.expandCodeFor(FullS, Ty); |
| 5532 |
|
5532 |
|
| 5533 |
// We're done expanding now, so reset the rewriter. |
5533 |
// We're done expanding now, so reset the rewriter. |
| 5534 |
Rewriter.clearPostInc(); |
5534 |
Rewriter.clearPostInc(); |
| 5535 |
|
5535 |
|
| 5536 |
// An ICmpZero Formula represents an ICmp which we're handling as a |
5536 |
// An ICmpZero Formula represents an ICmp which we're handling as a |
| 5537 |
// comparison against zero. Now that we've expanded an expression for that |
5537 |
// comparison against zero. Now that we've expanded an expression for that |
| 5538 |
// form, update the ICmp's other operand. |
5538 |
// form, update the ICmp's other operand. |
| 5539 |
if (LU.Kind == LSRUse::ICmpZero) { |
5539 |
if (LU.Kind == LSRUse::ICmpZero) { |
| 5540 |
ICmpInst *CI = cast(LF.UserInst); |
5540 |
ICmpInst *CI = cast(LF.UserInst); |
| 5541 |
if (auto *OperandIsInstr = dyn_cast(CI->getOperand(1))) |
5541 |
if (auto *OperandIsInstr = dyn_cast(CI->getOperand(1))) |
| 5542 |
DeadInsts.emplace_back(OperandIsInstr); |
5542 |
DeadInsts.emplace_back(OperandIsInstr); |
| 5543 |
assert(!F.BaseGV && "ICmp does not support folding a global value and " |
5543 |
assert(!F.BaseGV && "ICmp does not support folding a global value and " |
| 5544 |
"a scale at the same time!"); |
5544 |
"a scale at the same time!"); |
| 5545 |
if (F.Scale == -1) { |
5545 |
if (F.Scale == -1) { |
| 5546 |
if (ICmpScaledV->getType() != OpTy) { |
5546 |
if (ICmpScaledV->getType() != OpTy) { |
| 5547 |
Instruction *Cast = |
5547 |
Instruction *Cast = |
| 5548 |
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, |
5548 |
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, |
| 5549 |
OpTy, false), |
5549 |
OpTy, false), |
| 5550 |
ICmpScaledV, OpTy, "tmp", CI); |
5550 |
ICmpScaledV, OpTy, "tmp", CI); |
| 5551 |
ICmpScaledV = Cast; |
5551 |
ICmpScaledV = Cast; |
| 5552 |
} |
5552 |
} |
| 5553 |
CI->setOperand(1, ICmpScaledV); |
5553 |
CI->setOperand(1, ICmpScaledV); |
| 5554 |
} else { |
5554 |
} else { |
| 5555 |
// A scale of 1 means that the scale has been expanded as part of the |
5555 |
// A scale of 1 means that the scale has been expanded as part of the |
| 5556 |
// base regs. |
5556 |
// base regs. |
| 5557 |
assert((F.Scale == 0 || F.Scale == 1) && |
5557 |
assert((F.Scale == 0 || F.Scale == 1) && |
| 5558 |
"ICmp does not support folding a global value and " |
5558 |
"ICmp does not support folding a global value and " |
| 5559 |
"a scale at the same time!"); |
5559 |
"a scale at the same time!"); |
| 5560 |
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), |
5560 |
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), |
| 5561 |
-(uint64_t)Offset); |
5561 |
-(uint64_t)Offset); |
| 5562 |
if (C->getType() != OpTy) |
5562 |
if (C->getType() != OpTy) |
| 5563 |
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, |
5563 |
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, |
| 5564 |
OpTy, false), |
5564 |
OpTy, false), |
| 5565 |
C, OpTy); |
5565 |
C, OpTy); |
| 5566 |
|
5566 |
|
| 5567 |
CI->setOperand(1, C); |
5567 |
CI->setOperand(1, C); |
| 5568 |
} |
5568 |
} |
| 5569 |
} |
5569 |
} |
| 5570 |
|
5570 |
|
| 5571 |
return FullV; |
5571 |
return FullV; |
| 5572 |
} |
5572 |
} |
| 5573 |
|
5573 |
|
| 5574 |
/// Helper for Rewrite. PHI nodes are special because the use of their operands |
5574 |
/// Helper for Rewrite. PHI nodes are special because the use of their operands |
| 5575 |
/// effectively happens in their predecessor blocks, so the expression may need |
5575 |
/// effectively happens in their predecessor blocks, so the expression may need |
| 5576 |
/// to be expanded in multiple places. |
5576 |
/// to be expanded in multiple places. |
| 5577 |
void LSRInstance::RewriteForPHI( |
5577 |
void LSRInstance::RewriteForPHI( |
| 5578 |
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, |
5578 |
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, |
| 5579 |
SmallVectorImpl &DeadInsts) const { |
5579 |
SmallVectorImpl &DeadInsts) const { |
| 5580 |
DenseMap Inserted; |
5580 |
DenseMap Inserted; |
| 5581 |
|
5581 |
|
| 5582 |
// Inserting instructions in the loop and using them as PHI's input could |
5582 |
// Inserting instructions in the loop and using them as PHI's input could |
| 5583 |
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the |
5583 |
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the |
| 5584 |
// corresponding incoming block is not loop exiting). So collect all such |
5584 |
// corresponding incoming block is not loop exiting). So collect all such |
| 5585 |
// instructions to form LCSSA for them later. |
5585 |
// instructions to form LCSSA for them later. |
| 5586 |
SmallVector InsertedNonLCSSAInsts; |
5586 |
SmallVector InsertedNonLCSSAInsts; |
| 5587 |
|
5587 |
|
| 5588 |
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
5588 |
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
| 5589 |
if (PN->getIncomingValue(i) == LF.OperandValToReplace) { |
5589 |
if (PN->getIncomingValue(i) == LF.OperandValToReplace) { |
| 5590 |
bool needUpdateFixups = false; |
5590 |
bool needUpdateFixups = false; |
| 5591 |
BasicBlock *BB = PN->getIncomingBlock(i); |
5591 |
BasicBlock *BB = PN->getIncomingBlock(i); |
| 5592 |
|
5592 |
|
| 5593 |
// If this is a critical edge, split the edge so that we do not insert |
5593 |
// If this is a critical edge, split the edge so that we do not insert |
| 5594 |
// the code on all predecessor/successor paths. We do this unless this |
5594 |
// the code on all predecessor/successor paths. We do this unless this |
| 5595 |
// is the canonical backedge for this loop, which complicates post-inc |
5595 |
// is the canonical backedge for this loop, which complicates post-inc |
| 5596 |
// users. |
5596 |
// users. |
| 5597 |
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && |
5597 |
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && |
| 5598 |
!isa(BB->getTerminator()) && |
5598 |
!isa(BB->getTerminator()) && |
| 5599 |
!isa(BB->getTerminator())) { |
5599 |
!isa(BB->getTerminator())) { |
| 5600 |
BasicBlock *Parent = PN->getParent(); |
5600 |
BasicBlock *Parent = PN->getParent(); |
| 5601 |
Loop *PNLoop = LI.getLoopFor(Parent); |
5601 |
Loop *PNLoop = LI.getLoopFor(Parent); |
| 5602 |
if (!PNLoop || Parent != PNLoop->getHeader()) { |
5602 |
if (!PNLoop || Parent != PNLoop->getHeader()) { |
| 5603 |
// Split the critical edge. |
5603 |
// Split the critical edge. |
| 5604 |
BasicBlock *NewBB = nullptr; |
5604 |
BasicBlock *NewBB = nullptr; |
| 5605 |
if (!Parent->isLandingPad()) { |
5605 |
if (!Parent->isLandingPad()) { |
| 5606 |
NewBB = |
5606 |
NewBB = |
| 5607 |
SplitCriticalEdge(BB, Parent, |
5607 |
SplitCriticalEdge(BB, Parent, |
| 5608 |
CriticalEdgeSplittingOptions(&DT, &LI, MSSAU) |
5608 |
CriticalEdgeSplittingOptions(&DT, &LI, MSSAU) |
| 5609 |
.setMergeIdenticalEdges() |
5609 |
.setMergeIdenticalEdges() |
| 5610 |
.setKeepOneInputPHIs()); |
5610 |
.setKeepOneInputPHIs()); |
| 5611 |
} else { |
5611 |
} else { |
| 5612 |
SmallVector NewBBs; |
5612 |
SmallVector NewBBs; |
| 5613 |
SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI); |
5613 |
SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI); |
| 5614 |
NewBB = NewBBs[0]; |
5614 |
NewBB = NewBBs[0]; |
| 5615 |
} |
5615 |
} |
| 5616 |
// If NewBB==NULL, then SplitCriticalEdge refused to split because all |
5616 |
// If NewBB==NULL, then SplitCriticalEdge refused to split because all |
| 5617 |
// phi predecessors are identical. The simple thing to do is skip |
5617 |
// phi predecessors are identical. The simple thing to do is skip |
| 5618 |
// splitting in this case rather than complicate the API. |
5618 |
// splitting in this case rather than complicate the API. |
| 5619 |
if (NewBB) { |
5619 |
if (NewBB) { |
| 5620 |
// If PN is outside of the loop and BB is in the loop, we want to |
5620 |
// If PN is outside of the loop and BB is in the loop, we want to |
| 5621 |
// move the block to be immediately before the PHI block, not |
5621 |
// move the block to be immediately before the PHI block, not |
| 5622 |
// immediately after BB. |
5622 |
// immediately after BB. |
| 5623 |
if (L->contains(BB) && !L->contains(PN)) |
5623 |
if (L->contains(BB) && !L->contains(PN)) |
| 5624 |
NewBB->moveBefore(PN->getParent()); |
5624 |
NewBB->moveBefore(PN->getParent()); |
| 5625 |
|
5625 |
|
| 5626 |
// Splitting the edge can reduce the number of PHI entries we have. |
5626 |
// Splitting the edge can reduce the number of PHI entries we have. |
| 5627 |
e = PN->getNumIncomingValues(); |
5627 |
e = PN->getNumIncomingValues(); |
| 5628 |
BB = NewBB; |
5628 |
BB = NewBB; |
| 5629 |
i = PN->getBasicBlockIndex(BB); |
5629 |
i = PN->getBasicBlockIndex(BB); |
| 5630 |
|
5630 |
|
| 5631 |
needUpdateFixups = true; |
5631 |
needUpdateFixups = true; |
| 5632 |
} |
5632 |
} |
| 5633 |
} |
5633 |
} |
| 5634 |
} |
5634 |
} |
| 5635 |
|
5635 |
|
| 5636 |
std::pair::iterator, bool> Pair = |
5636 |
std::pair::iterator, bool> Pair = |
| 5637 |
Inserted.insert(std::make_pair(BB, static_cast(nullptr))); |
5637 |
Inserted.insert(std::make_pair(BB, static_cast(nullptr))); |
| 5638 |
if (!Pair.second) |
5638 |
if (!Pair.second) |
| 5639 |
PN->setIncomingValue(i, Pair.first->second); |
5639 |
PN->setIncomingValue(i, Pair.first->second); |
| 5640 |
else { |
5640 |
else { |
| 5641 |
Value *FullV = |
5641 |
Value *FullV = |
| 5642 |
Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts); |
5642 |
Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts); |
| 5643 |
|
5643 |
|
| 5644 |
// If this is reuse-by-noop-cast, insert the noop cast. |
5644 |
// If this is reuse-by-noop-cast, insert the noop cast. |
| 5645 |
Type *OpTy = LF.OperandValToReplace->getType(); |
5645 |
Type *OpTy = LF.OperandValToReplace->getType(); |
| 5646 |
if (FullV->getType() != OpTy) |
5646 |
if (FullV->getType() != OpTy) |
| 5647 |
FullV = |
5647 |
FullV = |
| 5648 |
CastInst::Create(CastInst::getCastOpcode(FullV, false, |
5648 |
CastInst::Create(CastInst::getCastOpcode(FullV, false, |
| 5649 |
OpTy, false), |
5649 |
OpTy, false), |
| 5650 |
FullV, LF.OperandValToReplace->getType(), |
5650 |
FullV, LF.OperandValToReplace->getType(), |
| 5651 |
"tmp", BB->getTerminator()); |
5651 |
"tmp", BB->getTerminator()); |
| 5652 |
|
5652 |
|
| 5653 |
// If the incoming block for this value is not in the loop, it means the |
5653 |
// If the incoming block for this value is not in the loop, it means the |
| 5654 |
// current PHI is not in a loop exit, so we must create a LCSSA PHI for |
5654 |
// current PHI is not in a loop exit, so we must create a LCSSA PHI for |
| 5655 |
// the inserted value. |
5655 |
// the inserted value. |
| 5656 |
if (auto *I = dyn_cast(FullV)) |
5656 |
if (auto *I = dyn_cast(FullV)) |
| 5657 |
if (L->contains(I) && !L->contains(BB)) |
5657 |
if (L->contains(I) && !L->contains(BB)) |
| 5658 |
InsertedNonLCSSAInsts.push_back(I); |
5658 |
InsertedNonLCSSAInsts.push_back(I); |
| 5659 |
|
5659 |
|
| 5660 |
PN->setIncomingValue(i, FullV); |
5660 |
PN->setIncomingValue(i, FullV); |
| 5661 |
Pair.first->second = FullV; |
5661 |
Pair.first->second = FullV; |
| 5662 |
} |
5662 |
} |
| 5663 |
|
5663 |
|
| 5664 |
// If LSR splits critical edge and phi node has other pending |
5664 |
// If LSR splits critical edge and phi node has other pending |
| 5665 |
// fixup operands, we need to update those pending fixups. Otherwise |
5665 |
// fixup operands, we need to update those pending fixups. Otherwise |
| 5666 |
// formulae will not be implemented completely and some instructions |
5666 |
// formulae will not be implemented completely and some instructions |
| 5667 |
// will not be eliminated. |
5667 |
// will not be eliminated. |
| 5668 |
if (needUpdateFixups) { |
5668 |
if (needUpdateFixups) { |
| 5669 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) |
5669 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) |
| 5670 |
for (LSRFixup &Fixup : Uses[LUIdx].Fixups) |
5670 |
for (LSRFixup &Fixup : Uses[LUIdx].Fixups) |
| 5671 |
// If fixup is supposed to rewrite some operand in the phi |
5671 |
// If fixup is supposed to rewrite some operand in the phi |
| 5672 |
// that was just updated, it may be already moved to |
5672 |
// that was just updated, it may be already moved to |
| 5673 |
// another phi node. Such fixup requires update. |
5673 |
// another phi node. Such fixup requires update. |
| 5674 |
if (Fixup.UserInst == PN) { |
5674 |
if (Fixup.UserInst == PN) { |
| 5675 |
// Check if the operand we try to replace still exists in the |
5675 |
// Check if the operand we try to replace still exists in the |
| 5676 |
// original phi. |
5676 |
// original phi. |
| 5677 |
bool foundInOriginalPHI = false; |
5677 |
bool foundInOriginalPHI = false; |
| 5678 |
for (const auto &val : PN->incoming_values()) |
5678 |
for (const auto &val : PN->incoming_values()) |
| 5679 |
if (val == Fixup.OperandValToReplace) { |
5679 |
if (val == Fixup.OperandValToReplace) { |
| 5680 |
foundInOriginalPHI = true; |
5680 |
foundInOriginalPHI = true; |
| 5681 |
break; |
5681 |
break; |
| 5682 |
} |
5682 |
} |
| 5683 |
|
5683 |
|
| 5684 |
// If fixup operand found in original PHI - nothing to do. |
5684 |
// If fixup operand found in original PHI - nothing to do. |
| 5685 |
if (foundInOriginalPHI) |
5685 |
if (foundInOriginalPHI) |
| 5686 |
continue; |
5686 |
continue; |
| 5687 |
|
5687 |
|
| 5688 |
// Otherwise it might be moved to another PHI and requires update. |
5688 |
// Otherwise it might be moved to another PHI and requires update. |
| 5689 |
// If fixup operand not found in any of the incoming blocks that |
5689 |
// If fixup operand not found in any of the incoming blocks that |
| 5690 |
// means we have already rewritten it - nothing to do. |
5690 |
// means we have already rewritten it - nothing to do. |
| 5691 |
for (const auto &Block : PN->blocks()) |
5691 |
for (const auto &Block : PN->blocks()) |
| 5692 |
for (BasicBlock::iterator I = Block->begin(); isa(I); |
5692 |
for (BasicBlock::iterator I = Block->begin(); isa(I); |
| 5693 |
++I) { |
5693 |
++I) { |
| 5694 |
PHINode *NewPN = cast(I); |
5694 |
PHINode *NewPN = cast(I); |
| 5695 |
for (const auto &val : NewPN->incoming_values()) |
5695 |
for (const auto &val : NewPN->incoming_values()) |
| 5696 |
if (val == Fixup.OperandValToReplace) |
5696 |
if (val == Fixup.OperandValToReplace) |
| 5697 |
Fixup.UserInst = NewPN; |
5697 |
Fixup.UserInst = NewPN; |
| 5698 |
} |
5698 |
} |
| 5699 |
} |
5699 |
} |
| 5700 |
} |
5700 |
} |
| 5701 |
} |
5701 |
} |
| 5702 |
|
5702 |
|
| 5703 |
formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE); |
5703 |
formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE); |
| 5704 |
} |
5704 |
} |
| 5705 |
|
5705 |
|
| 5706 |
/// Emit instructions for the leading candidate expression for this LSRUse (this |
5706 |
/// Emit instructions for the leading candidate expression for this LSRUse (this |
| 5707 |
/// is called "expanding"), and update the UserInst to reference the newly |
5707 |
/// is called "expanding"), and update the UserInst to reference the newly |
| 5708 |
/// expanded value. |
5708 |
/// expanded value. |
| 5709 |
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, |
5709 |
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, |
| 5710 |
const Formula &F, |
5710 |
const Formula &F, |
| 5711 |
SmallVectorImpl &DeadInsts) const { |
5711 |
SmallVectorImpl &DeadInsts) const { |
| 5712 |
// First, find an insertion point that dominates UserInst. For PHI nodes, |
5712 |
// First, find an insertion point that dominates UserInst. For PHI nodes, |
| 5713 |
// find the nearest block which dominates all the relevant uses. |
5713 |
// find the nearest block which dominates all the relevant uses. |
| 5714 |
if (PHINode *PN = dyn_cast(LF.UserInst)) { |
5714 |
if (PHINode *PN = dyn_cast(LF.UserInst)) { |
| 5715 |
RewriteForPHI(PN, LU, LF, F, DeadInsts); |
5715 |
RewriteForPHI(PN, LU, LF, F, DeadInsts); |
| 5716 |
} else { |
5716 |
} else { |
| 5717 |
Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts); |
5717 |
Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts); |
| 5718 |
|
5718 |
|
| 5719 |
// If this is reuse-by-noop-cast, insert the noop cast. |
5719 |
// If this is reuse-by-noop-cast, insert the noop cast. |
| 5720 |
Type *OpTy = LF.OperandValToReplace->getType(); |
5720 |
Type *OpTy = LF.OperandValToReplace->getType(); |
| 5721 |
if (FullV->getType() != OpTy) { |
5721 |
if (FullV->getType() != OpTy) { |
| 5722 |
Instruction *Cast = |
5722 |
Instruction *Cast = |
| 5723 |
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), |
5723 |
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), |
| 5724 |
FullV, OpTy, "tmp", LF.UserInst); |
5724 |
FullV, OpTy, "tmp", LF.UserInst); |
| 5725 |
FullV = Cast; |
5725 |
FullV = Cast; |
| 5726 |
} |
5726 |
} |
| 5727 |
|
5727 |
|
| 5728 |
// Update the user. ICmpZero is handled specially here (for now) because |
5728 |
// Update the user. ICmpZero is handled specially here (for now) because |
| 5729 |
// Expand may have updated one of the operands of the icmp already, and |
5729 |
// Expand may have updated one of the operands of the icmp already, and |
| 5730 |
// its new value may happen to be equal to LF.OperandValToReplace, in |
5730 |
// its new value may happen to be equal to LF.OperandValToReplace, in |
| 5731 |
// which case doing replaceUsesOfWith leads to replacing both operands |
5731 |
// which case doing replaceUsesOfWith leads to replacing both operands |
| 5732 |
// with the same value. TODO: Reorganize this. |
5732 |
// with the same value. TODO: Reorganize this. |
| 5733 |
if (LU.Kind == LSRUse::ICmpZero) |
5733 |
if (LU.Kind == LSRUse::ICmpZero) |
| 5734 |
LF.UserInst->setOperand(0, FullV); |
5734 |
LF.UserInst->setOperand(0, FullV); |
| 5735 |
else |
5735 |
else |
| 5736 |
LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); |
5736 |
LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); |
| 5737 |
} |
5737 |
} |
| 5738 |
|
5738 |
|
| 5739 |
if (auto *OperandIsInstr = dyn_cast(LF.OperandValToReplace)) |
5739 |
if (auto *OperandIsInstr = dyn_cast(LF.OperandValToReplace)) |
| 5740 |
DeadInsts.emplace_back(OperandIsInstr); |
5740 |
DeadInsts.emplace_back(OperandIsInstr); |
| 5741 |
} |
5741 |
} |
| 5742 |
|
5742 |
|
| 5743 |
// Trying to hoist the IVInc to loop header if all IVInc users are in |
5743 |
// Trying to hoist the IVInc to loop header if all IVInc users are in |
| 5744 |
// the loop header. It will help backend to generate post index load/store |
5744 |
// the loop header. It will help backend to generate post index load/store |
| 5745 |
// when the latch block is different from loop header block. |
5745 |
// when the latch block is different from loop header block. |
| 5746 |
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, |
5746 |
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, |
| 5747 |
const LSRUse &LU, Instruction *IVIncInsertPos, |
5747 |
const LSRUse &LU, Instruction *IVIncInsertPos, |
| 5748 |
Loop *L) { |
5748 |
Loop *L) { |
| 5749 |
if (LU.Kind != LSRUse::Address) |
5749 |
if (LU.Kind != LSRUse::Address) |
| 5750 |
return false; |
5750 |
return false; |
| 5751 |
|
5751 |
|
| 5752 |
// For now this code do the conservative optimization, only work for |
5752 |
// For now this code do the conservative optimization, only work for |
| 5753 |
// the header block. Later we can hoist the IVInc to the block post |
5753 |
// the header block. Later we can hoist the IVInc to the block post |
| 5754 |
// dominate all users. |
5754 |
// dominate all users. |
| 5755 |
BasicBlock *LHeader = L->getHeader(); |
5755 |
BasicBlock *LHeader = L->getHeader(); |
| 5756 |
if (IVIncInsertPos->getParent() == LHeader) |
5756 |
if (IVIncInsertPos->getParent() == LHeader) |
| 5757 |
return false; |
5757 |
return false; |
| 5758 |
|
5758 |
|
| 5759 |
if (!Fixup.OperandValToReplace || |
5759 |
if (!Fixup.OperandValToReplace || |
| 5760 |
any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) { |
5760 |
any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) { |
| 5761 |
Instruction *UI = cast(U); |
5761 |
Instruction *UI = cast(U); |
| 5762 |
return UI->getParent() != LHeader; |
5762 |
return UI->getParent() != LHeader; |
| 5763 |
})) |
5763 |
})) |
| 5764 |
return false; |
5764 |
return false; |
| 5765 |
|
5765 |
|
| 5766 |
Instruction *I = Fixup.UserInst; |
5766 |
Instruction *I = Fixup.UserInst; |
| 5767 |
Type *Ty = I->getType(); |
5767 |
Type *Ty = I->getType(); |
| 5768 |
return Ty->isIntegerTy() && |
5768 |
return Ty->isIntegerTy() && |
| 5769 |
((isa(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) || |
5769 |
((isa(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) || |
| 5770 |
(isa(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty))); |
5770 |
(isa(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty))); |
| 5771 |
} |
5771 |
} |
| 5772 |
|
5772 |
|
| 5773 |
/// Rewrite all the fixup locations with new values, following the chosen |
5773 |
/// Rewrite all the fixup locations with new values, following the chosen |
| 5774 |
/// solution. |
5774 |
/// solution. |
| 5775 |
void LSRInstance::ImplementSolution( |
5775 |
void LSRInstance::ImplementSolution( |
| 5776 |
const SmallVectorImpl &Solution) { |
5776 |
const SmallVectorImpl &Solution) { |
| 5777 |
// Keep track of instructions we may have made dead, so that |
5777 |
// Keep track of instructions we may have made dead, so that |
| 5778 |
// we can remove them after we are done working. |
5778 |
// we can remove them after we are done working. |
| 5779 |
SmallVector DeadInsts; |
5779 |
SmallVector DeadInsts; |
| 5780 |
|
5780 |
|
| 5781 |
// Mark phi nodes that terminate chains so the expander tries to reuse them. |
5781 |
// Mark phi nodes that terminate chains so the expander tries to reuse them. |
| 5782 |
for (const IVChain &Chain : IVChainVec) { |
5782 |
for (const IVChain &Chain : IVChainVec) { |
| 5783 |
if (PHINode *PN = dyn_cast(Chain.tailUserInst())) |
5783 |
if (PHINode *PN = dyn_cast(Chain.tailUserInst())) |
| 5784 |
Rewriter.setChainedPhi(PN); |
5784 |
Rewriter.setChainedPhi(PN); |
| 5785 |
} |
5785 |
} |
| 5786 |
|
5786 |
|
| 5787 |
// Expand the new value definitions and update the users. |
5787 |
// Expand the new value definitions and update the users. |
| 5788 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) |
5788 |
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) |
| 5789 |
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { |
5789 |
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { |
| 5790 |
Instruction *InsertPos = |
5790 |
Instruction *InsertPos = |
| 5791 |
canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L) |
5791 |
canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L) |
| 5792 |
? L->getHeader()->getTerminator() |
5792 |
? L->getHeader()->getTerminator() |
| 5793 |
: IVIncInsertPos; |
5793 |
: IVIncInsertPos; |
| 5794 |
Rewriter.setIVIncInsertPos(L, InsertPos); |
5794 |
Rewriter.setIVIncInsertPos(L, InsertPos); |
| 5795 |
Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); |
5795 |
Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); |
| 5796 |
Changed = true; |
5796 |
Changed = true; |
| 5797 |
} |
5797 |
} |
| 5798 |
|
5798 |
|
| 5799 |
for (const IVChain &Chain : IVChainVec) { |
5799 |
for (const IVChain &Chain : IVChainVec) { |
| 5800 |
GenerateIVChain(Chain, DeadInsts); |
5800 |
GenerateIVChain(Chain, DeadInsts); |
| 5801 |
Changed = true; |
5801 |
Changed = true; |
| 5802 |
} |
5802 |
} |
| 5803 |
|
5803 |
|
| 5804 |
for (const WeakVH &IV : Rewriter.getInsertedIVs()) |
5804 |
for (const WeakVH &IV : Rewriter.getInsertedIVs()) |
| 5805 |
if (IV && dyn_cast(&*IV)->getParent()) |
5805 |
if (IV && dyn_cast(&*IV)->getParent()) |
| 5806 |
ScalarEvolutionIVs.push_back(IV); |
5806 |
ScalarEvolutionIVs.push_back(IV); |
| 5807 |
|
5807 |
|
| 5808 |
// Clean up after ourselves. This must be done before deleting any |
5808 |
// Clean up after ourselves. This must be done before deleting any |
| 5809 |
// instructions. |
5809 |
// instructions. |
| 5810 |
Rewriter.clear(); |
5810 |
Rewriter.clear(); |
| 5811 |
|
5811 |
|
| 5812 |
Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, |
5812 |
Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, |
| 5813 |
&TLI, MSSAU); |
5813 |
&TLI, MSSAU); |
| 5814 |
|
5814 |
|
| 5815 |
// In our cost analysis above, we assume that each addrec consumes exactly |
5815 |
// In our cost analysis above, we assume that each addrec consumes exactly |
| 5816 |
// one register, and arrange to have increments inserted just before the |
5816 |
// one register, and arrange to have increments inserted just before the |
| 5817 |
// latch to maximimize the chance this is true. However, if we reused |
5817 |
// latch to maximimize the chance this is true. However, if we reused |
| 5818 |
// existing IVs, we now need to move the increments to match our |
5818 |
// existing IVs, we now need to move the increments to match our |
| 5819 |
// expectations. Otherwise, our cost modeling results in us having a |
5819 |
// expectations. Otherwise, our cost modeling results in us having a |
| 5820 |
// chosen a non-optimal result for the actual schedule. (And yes, this |
5820 |
// chosen a non-optimal result for the actual schedule. (And yes, this |
| 5821 |
// scheduling decision does impact later codegen.) |
5821 |
// scheduling decision does impact later codegen.) |
| 5822 |
for (PHINode &PN : L->getHeader()->phis()) { |
5822 |
for (PHINode &PN : L->getHeader()->phis()) { |
| 5823 |
BinaryOperator *BO = nullptr; |
5823 |
BinaryOperator *BO = nullptr; |
| 5824 |
Value *Start = nullptr, *Step = nullptr; |
5824 |
Value *Start = nullptr, *Step = nullptr; |
| 5825 |
if (!matchSimpleRecurrence(&PN, BO, Start, Step)) |
5825 |
if (!matchSimpleRecurrence(&PN, BO, Start, Step)) |
| 5826 |
continue; |
5826 |
continue; |
| 5827 |
|
5827 |
|
| 5828 |
switch (BO->getOpcode()) { |
5828 |
switch (BO->getOpcode()) { |
| 5829 |
case Instruction::Sub: |
5829 |
case Instruction::Sub: |
| 5830 |
if (BO->getOperand(0) != &PN) |
5830 |
if (BO->getOperand(0) != &PN) |
| 5831 |
// sub is non-commutative - match handling elsewhere in LSR |
5831 |
// sub is non-commutative - match handling elsewhere in LSR |
| 5832 |
continue; |
5832 |
continue; |
| 5833 |
break; |
5833 |
break; |
| 5834 |
case Instruction::Add: |
5834 |
case Instruction::Add: |
| 5835 |
break; |
5835 |
break; |
| 5836 |
default: |
5836 |
default: |
| 5837 |
continue; |
5837 |
continue; |
| 5838 |
}; |
5838 |
}; |
| 5839 |
|
5839 |
|
| 5840 |
if (!isa(Step)) |
5840 |
if (!isa(Step)) |
| 5841 |
// If not a constant step, might increase register pressure |
5841 |
// If not a constant step, might increase register pressure |
| 5842 |
// (We assume constants have been canonicalized to RHS) |
5842 |
// (We assume constants have been canonicalized to RHS) |
| 5843 |
continue; |
5843 |
continue; |
| 5844 |
|
5844 |
|
| 5845 |
if (BO->getParent() == IVIncInsertPos->getParent()) |
5845 |
if (BO->getParent() == IVIncInsertPos->getParent()) |
| 5846 |
// Only bother moving across blocks. Isel can handle block local case. |
5846 |
// Only bother moving across blocks. Isel can handle block local case. |
| 5847 |
continue; |
5847 |
continue; |
| 5848 |
|
5848 |
|
| 5849 |
// Can we legally schedule inc at the desired point? |
5849 |
// Can we legally schedule inc at the desired point? |
| 5850 |
if (!llvm::all_of(BO->uses(), |
5850 |
if (!llvm::all_of(BO->uses(), |
| 5851 |
[&](Use &U) {return DT.dominates(IVIncInsertPos, U);})) |
5851 |
[&](Use &U) {return DT.dominates(IVIncInsertPos, U);})) |
| 5852 |
continue; |
5852 |
continue; |
| 5853 |
BO->moveBefore(IVIncInsertPos); |
5853 |
BO->moveBefore(IVIncInsertPos); |
| 5854 |
Changed = true; |
5854 |
Changed = true; |
| 5855 |
} |
5855 |
} |
| 5856 |
|
5856 |
|
| 5857 |
|
5857 |
|
| 5858 |
} |
5858 |
} |
| 5859 |
|
5859 |
|
| 5860 |
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, |
5860 |
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, |
| 5861 |
DominatorTree &DT, LoopInfo &LI, |
5861 |
DominatorTree &DT, LoopInfo &LI, |
| 5862 |
const TargetTransformInfo &TTI, AssumptionCache &AC, |
5862 |
const TargetTransformInfo &TTI, AssumptionCache &AC, |
| 5863 |
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) |
5863 |
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) |
| 5864 |
: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), |
5864 |
: IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), |
| 5865 |
MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 |
5865 |
MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 |
| 5866 |
? PreferredAddresingMode |
5866 |
? PreferredAddresingMode |
| 5867 |
: TTI.getPreferredAddressingMode(L, &SE)), |
5867 |
: TTI.getPreferredAddressingMode(L, &SE)), |
| 5868 |
Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false), |
5868 |
Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false), |
| 5869 |
BaselineCost(L, SE, TTI, AMK) { |
5869 |
BaselineCost(L, SE, TTI, AMK) { |
| 5870 |
// If LoopSimplify form is not available, stay out of trouble. |
5870 |
// If LoopSimplify form is not available, stay out of trouble. |
| 5871 |
if (!L->isLoopSimplifyForm()) |
5871 |
if (!L->isLoopSimplifyForm()) |
| 5872 |
return; |
5872 |
return; |
| 5873 |
|
5873 |
|
| 5874 |
// If there's no interesting work to be done, bail early. |
5874 |
// If there's no interesting work to be done, bail early. |
| 5875 |
if (IU.empty()) return; |
5875 |
if (IU.empty()) return; |
| 5876 |
|
5876 |
|
| 5877 |
// If there's too much analysis to be done, bail early. We won't be able to |
5877 |
// If there's too much analysis to be done, bail early. We won't be able to |
| 5878 |
// model the problem anyway. |
5878 |
// model the problem anyway. |
| 5879 |
unsigned NumUsers = 0; |
5879 |
unsigned NumUsers = 0; |
| 5880 |
for (const IVStrideUse &U : IU) { |
5880 |
for (const IVStrideUse &U : IU) { |
| 5881 |
if (++NumUsers > MaxIVUsers) { |
5881 |
if (++NumUsers > MaxIVUsers) { |
| 5882 |
(void)U; |
5882 |
(void)U; |
| 5883 |
LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U |
5883 |
LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U |
| 5884 |
<< "\n"); |
5884 |
<< "\n"); |
| 5885 |
return; |
5885 |
return; |
| 5886 |
} |
5886 |
} |
| 5887 |
// Bail out if we have a PHI on an EHPad that gets a value from a |
5887 |
// Bail out if we have a PHI on an EHPad that gets a value from a |
| 5888 |
// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is |
5888 |
// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is |
| 5889 |
// no good place to stick any instructions. |
5889 |
// no good place to stick any instructions. |
| 5890 |
if (auto *PN = dyn_cast(U.getUser())) { |
5890 |
if (auto *PN = dyn_cast(U.getUser())) { |
| 5891 |
auto *FirstNonPHI = PN->getParent()->getFirstNonPHI(); |
5891 |
auto *FirstNonPHI = PN->getParent()->getFirstNonPHI(); |
| 5892 |
if (isa(FirstNonPHI) || |
5892 |
if (isa(FirstNonPHI) || |
| 5893 |
isa(FirstNonPHI)) |
5893 |
isa(FirstNonPHI)) |
| 5894 |
for (BasicBlock *PredBB : PN->blocks()) |
5894 |
for (BasicBlock *PredBB : PN->blocks()) |
| 5895 |
if (isa(PredBB->getFirstNonPHI())) |
5895 |
if (isa(PredBB->getFirstNonPHI())) |
| 5896 |
return; |
5896 |
return; |
| 5897 |
} |
5897 |
} |
| 5898 |
} |
5898 |
} |
| 5899 |
|
5899 |
|
| 5900 |
LLVM_DEBUG(dbgs() << "\nLSR on loop "; |
5900 |
LLVM_DEBUG(dbgs() << "\nLSR on loop "; |
| 5901 |
L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); |
5901 |
L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); |
| 5902 |
dbgs() << ":\n"); |
5902 |
dbgs() << ":\n"); |
| 5903 |
|
5903 |
|
| 5904 |
// Configure SCEVExpander already now, so the correct mode is used for |
5904 |
// Configure SCEVExpander already now, so the correct mode is used for |
| 5905 |
// isSafeToExpand() checks. |
5905 |
// isSafeToExpand() checks. |
| 5906 |
#ifndef NDEBUG |
5906 |
#ifndef NDEBUG |
| 5907 |
Rewriter.setDebugType(DEBUG_TYPE); |
5907 |
Rewriter.setDebugType(DEBUG_TYPE); |
| 5908 |
#endif |
5908 |
#endif |
| 5909 |
Rewriter.disableCanonicalMode(); |
5909 |
Rewriter.disableCanonicalMode(); |
| 5910 |
Rewriter.enableLSRMode(); |
5910 |
Rewriter.enableLSRMode(); |
| 5911 |
|
5911 |
|
| 5912 |
// First, perform some low-level loop optimizations. |
5912 |
// First, perform some low-level loop optimizations. |
| 5913 |
OptimizeShadowIV(); |
5913 |
OptimizeShadowIV(); |
| 5914 |
OptimizeLoopTermCond(); |
5914 |
OptimizeLoopTermCond(); |
| 5915 |
|
5915 |
|
| 5916 |
// If loop preparation eliminates all interesting IV users, bail. |
5916 |
// If loop preparation eliminates all interesting IV users, bail. |
| 5917 |
if (IU.empty()) return; |
5917 |
if (IU.empty()) return; |
| 5918 |
|
5918 |
|
| 5919 |
// Skip nested loops until we can model them better with formulae. |
5919 |
// Skip nested loops until we can model them better with formulae. |
| 5920 |
if (!L->isInnermost()) { |
5920 |
if (!L->isInnermost()) { |
| 5921 |
LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); |
5921 |
LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); |
| 5922 |
return; |
5922 |
return; |
| 5923 |
} |
5923 |
} |
| 5924 |
|
5924 |
|
| 5925 |
// Start collecting data and preparing for the solver. |
5925 |
// Start collecting data and preparing for the solver. |
| 5926 |
// If number of registers is not the major cost, we cannot benefit from the |
5926 |
// If number of registers is not the major cost, we cannot benefit from the |
| 5927 |
// current profitable chain optimization which is based on number of |
5927 |
// current profitable chain optimization which is based on number of |
| 5928 |
// registers. |
5928 |
// registers. |
| 5929 |
// FIXME: add profitable chain optimization for other kinds major cost, for |
5929 |
// FIXME: add profitable chain optimization for other kinds major cost, for |
| 5930 |
// example number of instructions. |
5930 |
// example number of instructions. |
| 5931 |
if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain) |
5931 |
if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain) |
| 5932 |
CollectChains(); |
5932 |
CollectChains(); |
| 5933 |
CollectInterestingTypesAndFactors(); |
5933 |
CollectInterestingTypesAndFactors(); |
| 5934 |
CollectFixupsAndInitialFormulae(); |
5934 |
CollectFixupsAndInitialFormulae(); |
| 5935 |
CollectLoopInvariantFixupsAndFormulae(); |
5935 |
CollectLoopInvariantFixupsAndFormulae(); |
| 5936 |
|
5936 |
|
| 5937 |
if (Uses.empty()) |
5937 |
if (Uses.empty()) |
| 5938 |
return; |
5938 |
return; |
| 5939 |
|
5939 |
|
| 5940 |
LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; |
5940 |
LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; |
| 5941 |
print_uses(dbgs())); |
5941 |
print_uses(dbgs())); |
| 5942 |
|
5942 |
|
| 5943 |
// Now use the reuse data to generate a bunch of interesting ways |
5943 |
// Now use the reuse data to generate a bunch of interesting ways |
| 5944 |
// to formulate the values needed for the uses. |
5944 |
// to formulate the values needed for the uses. |
| 5945 |
GenerateAllReuseFormulae(); |
5945 |
GenerateAllReuseFormulae(); |
| 5946 |
|
5946 |
|
| 5947 |
FilterOutUndesirableDedicatedRegisters(); |
5947 |
FilterOutUndesirableDedicatedRegisters(); |
| 5948 |
NarrowSearchSpaceUsingHeuristics(); |
5948 |
NarrowSearchSpaceUsingHeuristics(); |
| 5949 |
|
5949 |
|
| 5950 |
SmallVector Solution; |
5950 |
SmallVector Solution; |
| 5951 |
Solve(Solution); |
5951 |
Solve(Solution); |
| 5952 |
|
5952 |
|
| 5953 |
// Release memory that is no longer needed. |
5953 |
// Release memory that is no longer needed. |
| 5954 |
Factors.clear(); |
5954 |
Factors.clear(); |
| 5955 |
Types.clear(); |
5955 |
Types.clear(); |
| 5956 |
RegUses.clear(); |
5956 |
RegUses.clear(); |
| 5957 |
|
5957 |
|
| 5958 |
if (Solution.empty()) |
5958 |
if (Solution.empty()) |
| 5959 |
return; |
5959 |
return; |
| 5960 |
|
5960 |
|
| 5961 |
#ifndef NDEBUG |
5961 |
#ifndef NDEBUG |
| 5962 |
// Formulae should be legal. |
5962 |
// Formulae should be legal. |
| 5963 |
for (const LSRUse &LU : Uses) { |
5963 |
for (const LSRUse &LU : Uses) { |
| 5964 |
for (const Formula &F : LU.Formulae) |
5964 |
for (const Formula &F : LU.Formulae) |
| 5965 |
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, |
5965 |
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, |
| 5966 |
F) && "Illegal formula generated!"); |
5966 |
F) && "Illegal formula generated!"); |
| 5967 |
}; |
5967 |
}; |
| 5968 |
#endif |
5968 |
#endif |
| 5969 |
|
5969 |
|
| 5970 |
// Now that we've decided what we want, make it so. |
5970 |
// Now that we've decided what we want, make it so. |
| 5971 |
ImplementSolution(Solution); |
5971 |
ImplementSolution(Solution); |
| 5972 |
} |
5972 |
} |
| 5973 |
|
5973 |
|
| 5974 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
5974 |
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 5975 |
void LSRInstance::print_factors_and_types(raw_ostream &OS) const { |
5975 |
void LSRInstance::print_factors_and_types(raw_ostream &OS) const { |
| 5976 |
if (Factors.empty() && Types.empty()) return; |
5976 |
if (Factors.empty() && Types.empty()) return; |
| 5977 |
|
5977 |
|
| 5978 |
OS << "LSR has identified the following interesting factors and types: "; |
5978 |
OS << "LSR has identified the following interesting factors and types: "; |
| 5979 |
bool First = true; |
5979 |
bool First = true; |
| 5980 |
|
5980 |
|
| 5981 |
for (int64_t Factor : Factors) { |
5981 |
for (int64_t Factor : Factors) { |
| 5982 |
if (!First) OS << ", "; |
5982 |
if (!First) OS << ", "; |
| 5983 |
First = false; |
5983 |
First = false; |
| 5984 |
OS << '*' << Factor; |
5984 |
OS << '*' << Factor; |
| 5985 |
} |
5985 |
} |
| 5986 |
|
5986 |
|
| 5987 |
for (Type *Ty : Types) { |
5987 |
for (Type *Ty : Types) { |
| 5988 |
if (!First) OS << ", "; |
5988 |
if (!First) OS << ", "; |
| 5989 |
First = false; |
5989 |
First = false; |
| 5990 |
OS << '(' << *Ty << ')'; |
5990 |
OS << '(' << *Ty << ')'; |
| 5991 |
} |
5991 |
} |
| 5992 |
OS << '\n'; |
5992 |
OS << '\n'; |
| 5993 |
} |
5993 |
} |
| 5994 |
|
5994 |
|
| 5995 |
void LSRInstance::print_fixups(raw_ostream &OS) const { |
5995 |
void LSRInstance::print_fixups(raw_ostream &OS) const { |
| 5996 |
OS << "LSR is examining the following fixup sites:\n"; |
5996 |
OS << "LSR is examining the following fixup sites:\n"; |
| 5997 |
for (const LSRUse &LU : Uses) |
5997 |
for (const LSRUse &LU : Uses) |
| 5998 |
for (const LSRFixup &LF : LU.Fixups) { |
5998 |
for (const LSRFixup &LF : LU.Fixups) { |
| 5999 |
dbgs() << " "; |
5999 |
dbgs() << " "; |
| 6000 |
LF.print(OS); |
6000 |
LF.print(OS); |
| 6001 |
OS << '\n'; |
6001 |
OS << '\n'; |
| 6002 |
} |
6002 |
} |
| 6003 |
} |
6003 |
} |
| 6004 |
|
6004 |
|
| 6005 |
void LSRInstance::print_uses(raw_ostream &OS) const { |
6005 |
void LSRInstance::print_uses(raw_ostream &OS) const { |
| 6006 |
OS << "LSR is examining the following uses:\n"; |
6006 |
OS << "LSR is examining the following uses:\n"; |
| 6007 |
for (const LSRUse &LU : Uses) { |
6007 |
for (const LSRUse &LU : Uses) { |
| 6008 |
dbgs() << " "; |
6008 |
dbgs() << " "; |
| 6009 |
LU.print(OS); |
6009 |
LU.print(OS); |
| 6010 |
OS << '\n'; |
6010 |
OS << '\n'; |
| 6011 |
for (const Formula &F : LU.Formulae) { |
6011 |
for (const Formula &F : LU.Formulae) { |
| 6012 |
OS << " "; |
6012 |
OS << " "; |
| 6013 |
F.print(OS); |
6013 |
F.print(OS); |
| 6014 |
OS << '\n'; |
6014 |
OS << '\n'; |
| 6015 |
} |
6015 |
} |
| 6016 |
} |
6016 |
} |
| 6017 |
} |
6017 |
} |
| 6018 |
|
6018 |
|
| 6019 |
void LSRInstance::print(raw_ostream &OS) const { |
6019 |
void LSRInstance::print(raw_ostream &OS) const { |
| 6020 |
print_factors_and_types(OS); |
6020 |
print_factors_and_types(OS); |
| 6021 |
print_fixups(OS); |
6021 |
print_fixups(OS); |
| 6022 |
print_uses(OS); |
6022 |
print_uses(OS); |
| 6023 |
} |
6023 |
} |
| 6024 |
|
6024 |
|
| 6025 |
LLVM_DUMP_METHOD void LSRInstance::dump() const { |
6025 |
LLVM_DUMP_METHOD void LSRInstance::dump() const { |
| 6026 |
print(errs()); errs() << '\n'; |
6026 |
print(errs()); errs() << '\n'; |
| 6027 |
} |
6027 |
} |
| 6028 |
#endif |
6028 |
#endif |
| 6029 |
|
6029 |
|
| 6030 |
namespace { |
6030 |
namespace { |
| 6031 |
|
6031 |
|
| 6032 |
class LoopStrengthReduce : public LoopPass { |
6032 |
class LoopStrengthReduce : public LoopPass { |
| 6033 |
public: |
6033 |
public: |
| 6034 |
static char ID; // Pass ID, replacement for typeid |
6034 |
static char ID; // Pass ID, replacement for typeid |
| 6035 |
|
6035 |
|
| 6036 |
LoopStrengthReduce(); |
6036 |
LoopStrengthReduce(); |
| 6037 |
|
6037 |
|
| 6038 |
private: |
6038 |
private: |
| 6039 |
bool runOnLoop(Loop *L, LPPassManager &LPM) override; |
6039 |
bool runOnLoop(Loop *L, LPPassManager &LPM) override; |
| 6040 |
void getAnalysisUsage(AnalysisUsage &AU) const override; |
6040 |
void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 6041 |
}; |
6041 |
}; |
| 6042 |
|
6042 |
|
| 6043 |
} // end anonymous namespace |
6043 |
} // end anonymous namespace |
| 6044 |
|
6044 |
|
| 6045 |
LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) { |
6045 |
LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) { |
| 6046 |
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); |
6046 |
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); |
| 6047 |
} |
6047 |
} |
| 6048 |
|
6048 |
|
| 6049 |
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { |
6049 |
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { |
| 6050 |
// We split critical edges, so we change the CFG. However, we do update |
6050 |
// We split critical edges, so we change the CFG. However, we do update |
| 6051 |
// many analyses if they are around. |
6051 |
// many analyses if they are around. |
| 6052 |
AU.addPreservedID(LoopSimplifyID); |
6052 |
AU.addPreservedID(LoopSimplifyID); |
| 6053 |
|
6053 |
|
| 6054 |
AU.addRequired(); |
6054 |
AU.addRequired(); |
| 6055 |
AU.addPreserved(); |
6055 |
AU.addPreserved(); |
| 6056 |
AU.addRequiredID(LoopSimplifyID); |
6056 |
AU.addRequiredID(LoopSimplifyID); |
| 6057 |
AU.addRequired(); |
6057 |
AU.addRequired(); |
| 6058 |
AU.addPreserved(); |
6058 |
AU.addPreserved(); |
| 6059 |
AU.addRequired(); |
6059 |
AU.addRequired(); |
| 6060 |
AU.addPreserved(); |
6060 |
AU.addPreserved(); |
| 6061 |
AU.addRequired(); |
6061 |
AU.addRequired(); |
| 6062 |
AU.addRequired(); |
6062 |
AU.addRequired(); |
| 6063 |
// Requiring LoopSimplify a second time here prevents IVUsers from running |
6063 |
// Requiring LoopSimplify a second time here prevents IVUsers from running |
| 6064 |
// twice, since LoopSimplify was invalidated by running ScalarEvolution. |
6064 |
// twice, since LoopSimplify was invalidated by running ScalarEvolution. |
| 6065 |
AU.addRequiredID(LoopSimplifyID); |
6065 |
AU.addRequiredID(LoopSimplifyID); |
| 6066 |
AU.addRequired(); |
6066 |
AU.addRequired(); |
| 6067 |
AU.addPreserved(); |
6067 |
AU.addPreserved(); |
| 6068 |
AU.addRequired(); |
6068 |
AU.addRequired(); |
| 6069 |
AU.addPreserved(); |
6069 |
AU.addPreserved(); |
| 6070 |
} |
6070 |
} |
| 6071 |
|
6071 |
|
| 6072 |
namespace { |
6072 |
namespace { |
| 6073 |
|
6073 |
|
| 6074 |
/// Enables more convenient iteration over a DWARF expression vector. |
6074 |
/// Enables more convenient iteration over a DWARF expression vector. |
| 6075 |
static iterator_range |
6075 |
static iterator_range |
| 6076 |
ToDwarfOpIter(SmallVectorImpl &Expr) { |
6076 |
ToDwarfOpIter(SmallVectorImpl &Expr) { |
| 6077 |
llvm::DIExpression::expr_op_iterator Begin = |
6077 |
llvm::DIExpression::expr_op_iterator Begin = |
| 6078 |
llvm::DIExpression::expr_op_iterator(Expr.begin()); |
6078 |
llvm::DIExpression::expr_op_iterator(Expr.begin()); |
| 6079 |
llvm::DIExpression::expr_op_iterator End = |
6079 |
llvm::DIExpression::expr_op_iterator End = |
| 6080 |
llvm::DIExpression::expr_op_iterator(Expr.end()); |
6080 |
llvm::DIExpression::expr_op_iterator(Expr.end()); |
| 6081 |
return {Begin, End}; |
6081 |
return {Begin, End}; |
| 6082 |
} |
6082 |
} |
| 6083 |
|
6083 |
|
| 6084 |
struct SCEVDbgValueBuilder { |
6084 |
struct SCEVDbgValueBuilder { |
| 6085 |
SCEVDbgValueBuilder() = default; |
6085 |
SCEVDbgValueBuilder() = default; |
| 6086 |
SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); } |
6086 |
SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); } |
| 6087 |
|
6087 |
|
| 6088 |
void clone(const SCEVDbgValueBuilder &Base) { |
6088 |
void clone(const SCEVDbgValueBuilder &Base) { |
| 6089 |
LocationOps = Base.LocationOps; |
6089 |
LocationOps = Base.LocationOps; |
| 6090 |
Expr = Base.Expr; |
6090 |
Expr = Base.Expr; |
| 6091 |
} |
6091 |
} |
| 6092 |
|
6092 |
|
| 6093 |
void clear() { |
6093 |
void clear() { |
| 6094 |
LocationOps.clear(); |
6094 |
LocationOps.clear(); |
| 6095 |
Expr.clear(); |
6095 |
Expr.clear(); |
| 6096 |
} |
6096 |
} |
| 6097 |
|
6097 |
|
| 6098 |
/// The DIExpression as we translate the SCEV. |
6098 |
/// The DIExpression as we translate the SCEV. |
| 6099 |
SmallVector Expr; |
6099 |
SmallVector Expr; |
| 6100 |
/// The location ops of the DIExpression. |
6100 |
/// The location ops of the DIExpression. |
| 6101 |
SmallVector LocationOps; |
6101 |
SmallVector LocationOps; |
| 6102 |
|
6102 |
|
| 6103 |
void pushOperator(uint64_t Op) { Expr.push_back(Op); } |
6103 |
void pushOperator(uint64_t Op) { Expr.push_back(Op); } |
| 6104 |
void pushUInt(uint64_t Operand) { Expr.push_back(Operand); } |
6104 |
void pushUInt(uint64_t Operand) { Expr.push_back(Operand); } |
| 6105 |
|
6105 |
|
| 6106 |
/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value |
6106 |
/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value |
| 6107 |
/// in the set of values referenced by the expression. |
6107 |
/// in the set of values referenced by the expression. |
| 6108 |
void pushLocation(llvm::Value *V) { |
6108 |
void pushLocation(llvm::Value *V) { |
| 6109 |
Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg); |
6109 |
Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg); |
| 6110 |
auto *It = llvm::find(LocationOps, V); |
6110 |
auto *It = llvm::find(LocationOps, V); |
| 6111 |
unsigned ArgIndex = 0; |
6111 |
unsigned ArgIndex = 0; |
| 6112 |
if (It != LocationOps.end()) { |
6112 |
if (It != LocationOps.end()) { |
| 6113 |
ArgIndex = std::distance(LocationOps.begin(), It); |
6113 |
ArgIndex = std::distance(LocationOps.begin(), It); |
| 6114 |
} else { |
6114 |
} else { |
| 6115 |
ArgIndex = LocationOps.size(); |
6115 |
ArgIndex = LocationOps.size(); |
| 6116 |
LocationOps.push_back(V); |
6116 |
LocationOps.push_back(V); |
| 6117 |
} |
6117 |
} |
| 6118 |
Expr.push_back(ArgIndex); |
6118 |
Expr.push_back(ArgIndex); |
| 6119 |
} |
6119 |
} |
| 6120 |
|
6120 |
|
| 6121 |
void pushValue(const SCEVUnknown *U) { |
6121 |
void pushValue(const SCEVUnknown *U) { |
| 6122 |
llvm::Value *V = cast(U)->getValue(); |
6122 |
llvm::Value *V = cast(U)->getValue(); |
| 6123 |
pushLocation(V); |
6123 |
pushLocation(V); |
| 6124 |
} |
6124 |
} |
| 6125 |
|
6125 |
|
| 6126 |
bool pushConst(const SCEVConstant *C) { |
6126 |
bool pushConst(const SCEVConstant *C) { |
| 6127 |
if (C->getAPInt().getSignificantBits() > 64) |
6127 |
if (C->getAPInt().getSignificantBits() > 64) |
| 6128 |
return false; |
6128 |
return false; |
| 6129 |
Expr.push_back(llvm::dwarf::DW_OP_consts); |
6129 |
Expr.push_back(llvm::dwarf::DW_OP_consts); |
| 6130 |
Expr.push_back(C->getAPInt().getSExtValue()); |
6130 |
Expr.push_back(C->getAPInt().getSExtValue()); |
| 6131 |
return true; |
6131 |
return true; |
| 6132 |
} |
6132 |
} |
| 6133 |
|
6133 |
|
| 6134 |
// Iterating the expression as DWARF ops is convenient when updating |
6134 |
// Iterating the expression as DWARF ops is convenient when updating |
| 6135 |
// DWARF_OP_LLVM_args. |
6135 |
// DWARF_OP_LLVM_args. |
| 6136 |
iterator_range expr_ops() { |
6136 |
iterator_range expr_ops() { |
| 6137 |
return ToDwarfOpIter(Expr); |
6137 |
return ToDwarfOpIter(Expr); |
| 6138 |
} |
6138 |
} |
| 6139 |
|
6139 |
|
| 6140 |
/// Several SCEV types are sequences of the same arithmetic operator applied |
6140 |
/// Several SCEV types are sequences of the same arithmetic operator applied |
| 6141 |
/// to constants and values that may be extended or truncated. |
6141 |
/// to constants and values that may be extended or truncated. |
| 6142 |
bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr, |
6142 |
bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr, |
| 6143 |
uint64_t DwarfOp) { |
6143 |
uint64_t DwarfOp) { |
| 6144 |
assert((isa(CommExpr) || isa(CommExpr)) && |
6144 |
assert((isa(CommExpr) || isa(CommExpr)) && |
| 6145 |
"Expected arithmetic SCEV type"); |
6145 |
"Expected arithmetic SCEV type"); |
| 6146 |
bool Success = true; |
6146 |
bool Success = true; |
| 6147 |
unsigned EmitOperator = 0; |
6147 |
unsigned EmitOperator = 0; |
| 6148 |
for (const auto &Op : CommExpr->operands()) { |
6148 |
for (const auto &Op : CommExpr->operands()) { |
| 6149 |
Success &= pushSCEV(Op); |
6149 |
Success &= pushSCEV(Op); |
| 6150 |
|
6150 |
|
| 6151 |
if (EmitOperator >= 1) |
6151 |
if (EmitOperator >= 1) |
| 6152 |
pushOperator(DwarfOp); |
6152 |
pushOperator(DwarfOp); |
| 6153 |
++EmitOperator; |
6153 |
++EmitOperator; |
| 6154 |
} |
6154 |
} |
| 6155 |
return Success; |
6155 |
return Success; |
| 6156 |
} |
6156 |
} |
| 6157 |
|
6157 |
|
| 6158 |
// TODO: Identify and omit noop casts. |
6158 |
// TODO: Identify and omit noop casts. |
| 6159 |
bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) { |
6159 |
bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) { |
| 6160 |
const llvm::SCEV *Inner = C->getOperand(0); |
6160 |
const llvm::SCEV *Inner = C->getOperand(0); |
| 6161 |
const llvm::Type *Type = C->getType(); |
6161 |
const llvm::Type *Type = C->getType(); |
| 6162 |
uint64_t ToWidth = Type->getIntegerBitWidth(); |
6162 |
uint64_t ToWidth = Type->getIntegerBitWidth(); |
| 6163 |
bool Success = pushSCEV(Inner); |
6163 |
bool Success = pushSCEV(Inner); |
| 6164 |
uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth, |
6164 |
uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth, |
| 6165 |
IsSigned ? llvm::dwarf::DW_ATE_signed |
6165 |
IsSigned ? llvm::dwarf::DW_ATE_signed |
| 6166 |
: llvm::dwarf::DW_ATE_unsigned}; |
6166 |
: llvm::dwarf::DW_ATE_unsigned}; |
| 6167 |
for (const auto &Op : CastOps) |
6167 |
for (const auto &Op : CastOps) |
| 6168 |
pushOperator(Op); |
6168 |
pushOperator(Op); |
| 6169 |
return Success; |
6169 |
return Success; |
| 6170 |
} |
6170 |
} |
| 6171 |
|
6171 |
|
| 6172 |
// TODO: MinMax - although these haven't been encountered in the test suite. |
6172 |
// TODO: MinMax - although these haven't been encountered in the test suite. |
| 6173 |
bool pushSCEV(const llvm::SCEV *S) { |
6173 |
bool pushSCEV(const llvm::SCEV *S) { |
| 6174 |
bool Success = true; |
6174 |
bool Success = true; |
| 6175 |
if (const SCEVConstant *StartInt = dyn_cast(S)) { |
6175 |
if (const SCEVConstant *StartInt = dyn_cast(S)) { |
| 6176 |
Success &= pushConst(StartInt); |
6176 |
Success &= pushConst(StartInt); |
| 6177 |
|
6177 |
|
| 6178 |
} else if (const SCEVUnknown *U = dyn_cast(S)) { |
6178 |
} else if (const SCEVUnknown *U = dyn_cast(S)) { |
| 6179 |
if (!U->getValue()) |
6179 |
if (!U->getValue()) |
| 6180 |
return false; |
6180 |
return false; |
| 6181 |
pushLocation(U->getValue()); |
6181 |
pushLocation(U->getValue()); |
| 6182 |
|
6182 |
|
| 6183 |
} else if (const SCEVMulExpr *MulRec = dyn_cast(S)) { |
6183 |
} else if (const SCEVMulExpr *MulRec = dyn_cast(S)) { |
| 6184 |
Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul); |
6184 |
Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul); |
| 6185 |
|
6185 |
|
| 6186 |
} else if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { |
6186 |
} else if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { |
| 6187 |
Success &= pushSCEV(UDiv->getLHS()); |
6187 |
Success &= pushSCEV(UDiv->getLHS()); |
| 6188 |
Success &= pushSCEV(UDiv->getRHS()); |
6188 |
Success &= pushSCEV(UDiv->getRHS()); |
| 6189 |
pushOperator(llvm::dwarf::DW_OP_div); |
6189 |
pushOperator(llvm::dwarf::DW_OP_div); |
| 6190 |
|
6190 |
|
| 6191 |
} else if (const SCEVCastExpr *Cast = dyn_cast(S)) { |
6191 |
} else if (const SCEVCastExpr *Cast = dyn_cast(S)) { |
| 6192 |
// Assert if a new and unknown SCEVCastEXpr type is encountered. |
6192 |
// Assert if a new and unknown SCEVCastEXpr type is encountered. |
| 6193 |
assert((isa(Cast) || isa(Cast) || |
6193 |
assert((isa(Cast) || isa(Cast) || |
| 6194 |
isa(Cast) || isa(Cast)) && |
6194 |
isa(Cast) || isa(Cast)) && |
| 6195 |
"Unexpected cast type in SCEV."); |
6195 |
"Unexpected cast type in SCEV."); |
| 6196 |
Success &= pushCast(Cast, (isa(Cast))); |
6196 |
Success &= pushCast(Cast, (isa(Cast))); |
| 6197 |
|
6197 |
|
| 6198 |
} else if (const SCEVAddExpr *AddExpr = dyn_cast(S)) { |
6198 |
} else if (const SCEVAddExpr *AddExpr = dyn_cast(S)) { |
| 6199 |
Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus); |
6199 |
Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus); |
| 6200 |
|
6200 |
|
| 6201 |
} else if (isa(S)) { |
6201 |
} else if (isa(S)) { |
| 6202 |
// Nested SCEVAddRecExpr are generated by nested loops and are currently |
6202 |
// Nested SCEVAddRecExpr are generated by nested loops and are currently |
| 6203 |
// unsupported. |
6203 |
// unsupported. |
| 6204 |
return false; |
6204 |
return false; |
| 6205 |
|
6205 |
|
| 6206 |
} else { |
6206 |
} else { |
| 6207 |
return false; |
6207 |
return false; |
| 6208 |
} |
6208 |
} |
| 6209 |
return Success; |
6209 |
return Success; |
| 6210 |
} |
6210 |
} |
| 6211 |
|
6211 |
|
| 6212 |
/// Return true if the combination of arithmetic operator and underlying |
6212 |
/// Return true if the combination of arithmetic operator and underlying |
| 6213 |
/// SCEV constant value is an identity function. |
6213 |
/// SCEV constant value is an identity function. |
| 6214 |
bool isIdentityFunction(uint64_t Op, const SCEV *S) { |
6214 |
bool isIdentityFunction(uint64_t Op, const SCEV *S) { |
| 6215 |
if (const SCEVConstant *C = dyn_cast(S)) { |
6215 |
if (const SCEVConstant *C = dyn_cast(S)) { |
| 6216 |
if (C->getAPInt().getSignificantBits() > 64) |
6216 |
if (C->getAPInt().getSignificantBits() > 64) |
| 6217 |
return false; |
6217 |
return false; |
| 6218 |
int64_t I = C->getAPInt().getSExtValue(); |
6218 |
int64_t I = C->getAPInt().getSExtValue(); |
| 6219 |
switch (Op) { |
6219 |
switch (Op) { |
| 6220 |
case llvm::dwarf::DW_OP_plus: |
6220 |
case llvm::dwarf::DW_OP_plus: |
| 6221 |
case llvm::dwarf::DW_OP_minus: |
6221 |
case llvm::dwarf::DW_OP_minus: |
| 6222 |
return I == 0; |
6222 |
return I == 0; |
| 6223 |
case llvm::dwarf::DW_OP_mul: |
6223 |
case llvm::dwarf::DW_OP_mul: |
| 6224 |
case llvm::dwarf::DW_OP_div: |
6224 |
case llvm::dwarf::DW_OP_div: |
| 6225 |
return I == 1; |
6225 |
return I == 1; |
| 6226 |
} |
6226 |
} |
| 6227 |
} |
6227 |
} |
| 6228 |
return false; |
6228 |
return false; |
| 6229 |
} |
6229 |
} |
| 6230 |
|
6230 |
|
| 6231 |
/// Convert a SCEV of a value to a DIExpression that is pushed onto the |
6231 |
/// Convert a SCEV of a value to a DIExpression that is pushed onto the |
| 6232 |
/// builder's expression stack. The stack should already contain an |
6232 |
/// builder's expression stack. The stack should already contain an |
| 6233 |
/// expression for the iteration count, so that it can be multiplied by |
6233 |
/// expression for the iteration count, so that it can be multiplied by |
| 6234 |
/// the stride and added to the start. |
6234 |
/// the stride and added to the start. |
| 6235 |
/// Components of the expression are omitted if they are an identity function. |
6235 |
/// Components of the expression are omitted if they are an identity function. |
| 6236 |
/// Chain (non-affine) SCEVs are not supported. |
6236 |
/// Chain (non-affine) SCEVs are not supported. |
| 6237 |
bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) { |
6237 |
bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) { |
| 6238 |
assert(SAR.isAffine() && "Expected affine SCEV"); |
6238 |
assert(SAR.isAffine() && "Expected affine SCEV"); |
| 6239 |
// TODO: Is this check needed? |
6239 |
// TODO: Is this check needed? |
| 6240 |
if (isa(SAR.getStart())) |
6240 |
if (isa(SAR.getStart())) |
| 6241 |
return false; |
6241 |
return false; |
| 6242 |
|
6242 |
|
| 6243 |
const SCEV *Start = SAR.getStart(); |
6243 |
const SCEV *Start = SAR.getStart(); |
| 6244 |
const SCEV *Stride = SAR.getStepRecurrence(SE); |
6244 |
const SCEV *Stride = SAR.getStepRecurrence(SE); |
| 6245 |
|
6245 |
|
| 6246 |
// Skip pushing arithmetic noops. |
6246 |
// Skip pushing arithmetic noops. |
| 6247 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) { |
6247 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) { |
| 6248 |
if (!pushSCEV(Stride)) |
6248 |
if (!pushSCEV(Stride)) |
| 6249 |
return false; |
6249 |
return false; |
| 6250 |
pushOperator(llvm::dwarf::DW_OP_mul); |
6250 |
pushOperator(llvm::dwarf::DW_OP_mul); |
| 6251 |
} |
6251 |
} |
| 6252 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) { |
6252 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) { |
| 6253 |
if (!pushSCEV(Start)) |
6253 |
if (!pushSCEV(Start)) |
| 6254 |
return false; |
6254 |
return false; |
| 6255 |
pushOperator(llvm::dwarf::DW_OP_plus); |
6255 |
pushOperator(llvm::dwarf::DW_OP_plus); |
| 6256 |
} |
6256 |
} |
| 6257 |
return true; |
6257 |
return true; |
| 6258 |
} |
6258 |
} |
| 6259 |
|
6259 |
|
| 6260 |
/// Create an expression that is an offset from a value (usually the IV). |
6260 |
/// Create an expression that is an offset from a value (usually the IV). |
| 6261 |
void createOffsetExpr(int64_t Offset, Value *OffsetValue) { |
6261 |
void createOffsetExpr(int64_t Offset, Value *OffsetValue) { |
| 6262 |
pushLocation(OffsetValue); |
6262 |
pushLocation(OffsetValue); |
| 6263 |
DIExpression::appendOffset(Expr, Offset); |
6263 |
DIExpression::appendOffset(Expr, Offset); |
| 6264 |
LLVM_DEBUG( |
6264 |
LLVM_DEBUG( |
| 6265 |
dbgs() << "scev-salvage: Generated IV offset expression. Offset: " |
6265 |
dbgs() << "scev-salvage: Generated IV offset expression. Offset: " |
| 6266 |
<< std::to_string(Offset) << "\n"); |
6266 |
<< std::to_string(Offset) << "\n"); |
| 6267 |
} |
6267 |
} |
| 6268 |
|
6268 |
|
| 6269 |
/// Combine a translation of the SCEV and the IV to create an expression that |
6269 |
/// Combine a translation of the SCEV and the IV to create an expression that |
| 6270 |
/// recovers a location's value. |
6270 |
/// recovers a location's value. |
| 6271 |
/// returns true if an expression was created. |
6271 |
/// returns true if an expression was created. |
| 6272 |
bool createIterCountExpr(const SCEV *S, |
6272 |
bool createIterCountExpr(const SCEV *S, |
| 6273 |
const SCEVDbgValueBuilder &IterationCount, |
6273 |
const SCEVDbgValueBuilder &IterationCount, |
| 6274 |
ScalarEvolution &SE) { |
6274 |
ScalarEvolution &SE) { |
| 6275 |
// SCEVs for SSA values are most frquently of the form |
6275 |
// SCEVs for SSA values are most frquently of the form |
| 6276 |
// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). |
6276 |
// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). |
| 6277 |
// This is because %a is a PHI node that is not the IV. However, these |
6277 |
// This is because %a is a PHI node that is not the IV. However, these |
| 6278 |
// SCEVs have not been observed to result in debuginfo-lossy optimisations, |
6278 |
// SCEVs have not been observed to result in debuginfo-lossy optimisations, |
| 6279 |
// so its not expected this point will be reached. |
6279 |
// so its not expected this point will be reached. |
| 6280 |
if (!isa(S)) |
6280 |
if (!isa(S)) |
| 6281 |
return false; |
6281 |
return false; |
| 6282 |
|
6282 |
|
| 6283 |
LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S |
6283 |
LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S |
| 6284 |
<< '\n'); |
6284 |
<< '\n'); |
| 6285 |
|
6285 |
|
| 6286 |
const auto *Rec = cast(S); |
6286 |
const auto *Rec = cast(S); |
| 6287 |
if (!Rec->isAffine()) |
6287 |
if (!Rec->isAffine()) |
| 6288 |
return false; |
6288 |
return false; |
| 6289 |
|
6289 |
|
| 6290 |
if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize) |
6290 |
if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize) |
| 6291 |
return false; |
6291 |
return false; |
| 6292 |
|
6292 |
|
| 6293 |
// Initialise a new builder with the iteration count expression. In |
6293 |
// Initialise a new builder with the iteration count expression. In |
| 6294 |
// combination with the value's SCEV this enables recovery. |
6294 |
// combination with the value's SCEV this enables recovery. |
| 6295 |
clone(IterationCount); |
6295 |
clone(IterationCount); |
| 6296 |
if (!SCEVToValueExpr(*Rec, SE)) |
6296 |
if (!SCEVToValueExpr(*Rec, SE)) |
| 6297 |
return false; |
6297 |
return false; |
| 6298 |
|
6298 |
|
| 6299 |
return true; |
6299 |
return true; |
| 6300 |
} |
6300 |
} |
| 6301 |
|
6301 |
|
| 6302 |
/// Convert a SCEV of a value to a DIExpression that is pushed onto the |
6302 |
/// Convert a SCEV of a value to a DIExpression that is pushed onto the |
| 6303 |
/// builder's expression stack. The stack should already contain an |
6303 |
/// builder's expression stack. The stack should already contain an |
| 6304 |
/// expression for the iteration count, so that it can be multiplied by |
6304 |
/// expression for the iteration count, so that it can be multiplied by |
| 6305 |
/// the stride and added to the start. |
6305 |
/// the stride and added to the start. |
| 6306 |
/// Components of the expression are omitted if they are an identity function. |
6306 |
/// Components of the expression are omitted if they are an identity function. |
| 6307 |
bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR, |
6307 |
bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR, |
| 6308 |
ScalarEvolution &SE) { |
6308 |
ScalarEvolution &SE) { |
| 6309 |
assert(SAR.isAffine() && "Expected affine SCEV"); |
6309 |
assert(SAR.isAffine() && "Expected affine SCEV"); |
| 6310 |
if (isa(SAR.getStart())) { |
6310 |
if (isa(SAR.getStart())) { |
| 6311 |
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: " |
6311 |
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: " |
| 6312 |
<< SAR << '\n'); |
6312 |
<< SAR << '\n'); |
| 6313 |
return false; |
6313 |
return false; |
| 6314 |
} |
6314 |
} |
| 6315 |
const SCEV *Start = SAR.getStart(); |
6315 |
const SCEV *Start = SAR.getStart(); |
| 6316 |
const SCEV *Stride = SAR.getStepRecurrence(SE); |
6316 |
const SCEV *Stride = SAR.getStepRecurrence(SE); |
| 6317 |
|
6317 |
|
| 6318 |
// Skip pushing arithmetic noops. |
6318 |
// Skip pushing arithmetic noops. |
| 6319 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) { |
6319 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) { |
| 6320 |
if (!pushSCEV(Start)) |
6320 |
if (!pushSCEV(Start)) |
| 6321 |
return false; |
6321 |
return false; |
| 6322 |
pushOperator(llvm::dwarf::DW_OP_minus); |
6322 |
pushOperator(llvm::dwarf::DW_OP_minus); |
| 6323 |
} |
6323 |
} |
| 6324 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) { |
6324 |
if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) { |
| 6325 |
if (!pushSCEV(Stride)) |
6325 |
if (!pushSCEV(Stride)) |
| 6326 |
return false; |
6326 |
return false; |
| 6327 |
pushOperator(llvm::dwarf::DW_OP_div); |
6327 |
pushOperator(llvm::dwarf::DW_OP_div); |
| 6328 |
} |
6328 |
} |
| 6329 |
return true; |
6329 |
return true; |
| 6330 |
} |
6330 |
} |
| 6331 |
|
6331 |
|
| 6332 |
// Append the current expression and locations to a location list and an |
6332 |
// Append the current expression and locations to a location list and an |
| 6333 |
// expression list. Modify the DW_OP_LLVM_arg indexes to account for |
6333 |
// expression list. Modify the DW_OP_LLVM_arg indexes to account for |
| 6334 |
// the locations already present in the destination list. |
6334 |
// the locations already present in the destination list. |
| 6335 |
void appendToVectors(SmallVectorImpl &DestExpr, |
6335 |
void appendToVectors(SmallVectorImpl &DestExpr, |
| 6336 |
SmallVectorImpl &DestLocations) { |
6336 |
SmallVectorImpl &DestLocations) { |
| 6337 |
assert(!DestLocations.empty() && |
6337 |
assert(!DestLocations.empty() && |
| 6338 |
"Expected the locations vector to contain the IV"); |
6338 |
"Expected the locations vector to contain the IV"); |
| 6339 |
// The DWARF_OP_LLVM_arg arguments of the expression being appended must be |
6339 |
// The DWARF_OP_LLVM_arg arguments of the expression being appended must be |
| 6340 |
// modified to account for the locations already in the destination vector. |
6340 |
// modified to account for the locations already in the destination vector. |
| 6341 |
// All builders contain the IV as the first location op. |
6341 |
// All builders contain the IV as the first location op. |
| 6342 |
assert(!LocationOps.empty() && |
6342 |
assert(!LocationOps.empty() && |
| 6343 |
"Expected the location ops to contain the IV."); |
6343 |
"Expected the location ops to contain the IV."); |
| 6344 |
// DestIndexMap[n] contains the index in DestLocations for the nth |
6344 |
// DestIndexMap[n] contains the index in DestLocations for the nth |
| 6345 |
// location in this SCEVDbgValueBuilder. |
6345 |
// location in this SCEVDbgValueBuilder. |
| 6346 |
SmallVector DestIndexMap; |
6346 |
SmallVector DestIndexMap; |
| 6347 |
for (const auto &Op : LocationOps) { |
6347 |
for (const auto &Op : LocationOps) { |
| 6348 |
auto It = find(DestLocations, Op); |
6348 |
auto It = find(DestLocations, Op); |
| 6349 |
if (It != DestLocations.end()) { |
6349 |
if (It != DestLocations.end()) { |
| 6350 |
// Location already exists in DestLocations, reuse existing ArgIndex. |
6350 |
// Location already exists in DestLocations, reuse existing ArgIndex. |
| 6351 |
DestIndexMap.push_back(std::distance(DestLocations.begin(), It)); |
6351 |
DestIndexMap.push_back(std::distance(DestLocations.begin(), It)); |
| 6352 |
continue; |
6352 |
continue; |
| 6353 |
} |
6353 |
} |
| 6354 |
// Location is not in DestLocations, add it. |
6354 |
// Location is not in DestLocations, add it. |
| 6355 |
DestIndexMap.push_back(DestLocations.size()); |
6355 |
DestIndexMap.push_back(DestLocations.size()); |
| 6356 |
DestLocations.push_back(Op); |
6356 |
DestLocations.push_back(Op); |
| 6357 |
} |
6357 |
} |
| 6358 |
|
6358 |
|
| 6359 |
for (const auto &Op : expr_ops()) { |
6359 |
for (const auto &Op : expr_ops()) { |
| 6360 |
if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { |
6360 |
if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { |
| 6361 |
Op.appendToVector(DestExpr); |
6361 |
Op.appendToVector(DestExpr); |
| 6362 |
continue; |
6362 |
continue; |
| 6363 |
} |
6363 |
} |
| 6364 |
|
6364 |
|
| 6365 |
DestExpr.push_back(dwarf::DW_OP_LLVM_arg); |
6365 |
DestExpr.push_back(dwarf::DW_OP_LLVM_arg); |
| 6366 |
// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV, |
6366 |
// `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV, |
| 6367 |
// DestIndexMap[n] contains its new index in DestLocations. |
6367 |
// DestIndexMap[n] contains its new index in DestLocations. |
| 6368 |
uint64_t NewIndex = DestIndexMap[Op.getArg(0)]; |
6368 |
uint64_t NewIndex = DestIndexMap[Op.getArg(0)]; |
| 6369 |
DestExpr.push_back(NewIndex); |
6369 |
DestExpr.push_back(NewIndex); |
| 6370 |
} |
6370 |
} |
| 6371 |
} |
6371 |
} |
| 6372 |
}; |
6372 |
}; |
| 6373 |
|
6373 |
|
| 6374 |
/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs |
6374 |
/// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs |
| 6375 |
/// and DIExpression. |
6375 |
/// and DIExpression. |
| 6376 |
struct DVIRecoveryRec { |
6376 |
struct DVIRecoveryRec { |
| 6377 |
DVIRecoveryRec(DbgValueInst *DbgValue) |
6377 |
DVIRecoveryRec(DbgValueInst *DbgValue) |
| 6378 |
: DVI(DbgValue), Expr(DbgValue->getExpression()), |
6378 |
: DVI(DbgValue), Expr(DbgValue->getExpression()), |
| 6379 |
HadLocationArgList(false) {} |
6379 |
HadLocationArgList(false) {} |
| 6380 |
|
6380 |
|
| 6381 |
DbgValueInst *DVI; |
6381 |
DbgValueInst *DVI; |
| 6382 |
DIExpression *Expr; |
6382 |
DIExpression *Expr; |
| 6383 |
bool HadLocationArgList; |
6383 |
bool HadLocationArgList; |
| 6384 |
SmallVector LocationOps; |
6384 |
SmallVector LocationOps; |
| 6385 |
SmallVector SCEVs; |
6385 |
SmallVector SCEVs; |
| 6386 |
SmallVector, 2> RecoveryExprs; |
6386 |
SmallVector, 2> RecoveryExprs; |
| 6387 |
|
6387 |
|
| 6388 |
void clear() { |
6388 |
void clear() { |
| 6389 |
for (auto &RE : RecoveryExprs) |
6389 |
for (auto &RE : RecoveryExprs) |
| 6390 |
RE.reset(); |
6390 |
RE.reset(); |
| 6391 |
RecoveryExprs.clear(); |
6391 |
RecoveryExprs.clear(); |
| 6392 |
} |
6392 |
} |
| 6393 |
|
6393 |
|
| 6394 |
~DVIRecoveryRec() { clear(); } |
6394 |
~DVIRecoveryRec() { clear(); } |
| 6395 |
}; |
6395 |
}; |
| 6396 |
} // namespace |
6396 |
} // namespace |
| 6397 |
|
6397 |
|
| 6398 |
/// Returns the total number of DW_OP_llvm_arg operands in the expression. |
6398 |
/// Returns the total number of DW_OP_llvm_arg operands in the expression. |
| 6399 |
/// This helps in determining if a DIArglist is necessary or can be omitted from |
6399 |
/// This helps in determining if a DIArglist is necessary or can be omitted from |
| 6400 |
/// the dbg.value. |
6400 |
/// the dbg.value. |
| 6401 |
static unsigned numLLVMArgOps(SmallVectorImpl &Expr) { |
6401 |
static unsigned numLLVMArgOps(SmallVectorImpl &Expr) { |
| 6402 |
auto expr_ops = ToDwarfOpIter(Expr); |
6402 |
auto expr_ops = ToDwarfOpIter(Expr); |
| 6403 |
unsigned Count = 0; |
6403 |
unsigned Count = 0; |
| 6404 |
for (auto Op : expr_ops) |
6404 |
for (auto Op : expr_ops) |
| 6405 |
if (Op.getOp() == dwarf::DW_OP_LLVM_arg) |
6405 |
if (Op.getOp() == dwarf::DW_OP_LLVM_arg) |
| 6406 |
Count++; |
6406 |
Count++; |
| 6407 |
return Count; |
6407 |
return Count; |
| 6408 |
} |
6408 |
} |
| 6409 |
|
6409 |
|
| 6410 |
/// Overwrites DVI with the location and Ops as the DIExpression. This will |
6410 |
/// Overwrites DVI with the location and Ops as the DIExpression. This will |
| 6411 |
/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands, |
6411 |
/// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands, |
| 6412 |
/// because a DIArglist is not created for the first argument of the dbg.value. |
6412 |
/// because a DIArglist is not created for the first argument of the dbg.value. |
| 6413 |
static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location, |
6413 |
static void updateDVIWithLocation(DbgValueInst &DVI, Value *Location, |
| 6414 |
SmallVectorImpl &Ops) { |
6414 |
SmallVectorImpl &Ops) { |
| 6415 |
assert( |
6415 |
assert( |
| 6416 |
numLLVMArgOps(Ops) == 0 && |
6416 |
numLLVMArgOps(Ops) == 0 && |
| 6417 |
"Expected expression that does not contain any DW_OP_llvm_arg operands."); |
6417 |
"Expected expression that does not contain any DW_OP_llvm_arg operands."); |
| 6418 |
DVI.setRawLocation(ValueAsMetadata::get(Location)); |
6418 |
DVI.setRawLocation(ValueAsMetadata::get(Location)); |
| 6419 |
DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); |
6419 |
DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); |
| 6420 |
} |
6420 |
} |
| 6421 |
|
6421 |
|
| 6422 |
/// Overwrite DVI with locations placed into a DIArglist. |
6422 |
/// Overwrite DVI with locations placed into a DIArglist. |
| 6423 |
static void updateDVIWithLocations(DbgValueInst &DVI, |
6423 |
static void updateDVIWithLocations(DbgValueInst &DVI, |
| 6424 |
SmallVectorImpl &Locations, |
6424 |
SmallVectorImpl &Locations, |
| 6425 |
SmallVectorImpl &Ops) { |
6425 |
SmallVectorImpl &Ops) { |
| 6426 |
assert(numLLVMArgOps(Ops) != 0 && |
6426 |
assert(numLLVMArgOps(Ops) != 0 && |
| 6427 |
"Expected expression that references DIArglist locations using " |
6427 |
"Expected expression that references DIArglist locations using " |
| 6428 |
"DW_OP_llvm_arg operands."); |
6428 |
"DW_OP_llvm_arg operands."); |
| 6429 |
SmallVector MetadataLocs; |
6429 |
SmallVector MetadataLocs; |
| 6430 |
for (Value *V : Locations) |
6430 |
for (Value *V : Locations) |
| 6431 |
MetadataLocs.push_back(ValueAsMetadata::get(V)); |
6431 |
MetadataLocs.push_back(ValueAsMetadata::get(V)); |
| 6432 |
auto ValArrayRef = llvm::ArrayRef(MetadataLocs); |
6432 |
auto ValArrayRef = llvm::ArrayRef(MetadataLocs); |
| 6433 |
DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef)); |
6433 |
DVI.setRawLocation(llvm::DIArgList::get(DVI.getContext(), ValArrayRef)); |
| 6434 |
DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); |
6434 |
DVI.setExpression(DIExpression::get(DVI.getContext(), Ops)); |
| 6435 |
} |
6435 |
} |
| 6436 |
|
6436 |
|
| 6437 |
/// Write the new expression and new location ops for the dbg.value. If possible |
6437 |
/// Write the new expression and new location ops for the dbg.value. If possible |
| 6438 |
/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This |
6438 |
/// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This |
| 6439 |
/// can be omitted if: |
6439 |
/// can be omitted if: |
| 6440 |
/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg. |
6440 |
/// 1. There is only a single location, refenced by a single DW_OP_llvm_arg. |
| 6441 |
/// 2. The DW_OP_LLVM_arg is the first operand in the expression. |
6441 |
/// 2. The DW_OP_LLVM_arg is the first operand in the expression. |
| 6442 |
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, |
6442 |
static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, |
| 6443 |
SmallVectorImpl &NewLocationOps, |
6443 |
SmallVectorImpl &NewLocationOps, |
| 6444 |
SmallVectorImpl &NewExpr) { |
6444 |
SmallVectorImpl &NewExpr) { |
| 6445 |
unsigned NumLLVMArgs = numLLVMArgOps(NewExpr); |
6445 |
unsigned NumLLVMArgs = numLLVMArgOps(NewExpr); |
| 6446 |
if (NumLLVMArgs == 0) { |
6446 |
if (NumLLVMArgs == 0) { |
| 6447 |
// Location assumed to be on the stack. |
6447 |
// Location assumed to be on the stack. |
| 6448 |
updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr); |
6448 |
updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], NewExpr); |
| 6449 |
} else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) { |
6449 |
} else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) { |
| 6450 |
// There is only a single DW_OP_llvm_arg at the start of the expression, |
6450 |
// There is only a single DW_OP_llvm_arg at the start of the expression, |
| 6451 |
// so it can be omitted along with DIArglist. |
6451 |
// so it can be omitted along with DIArglist. |
| 6452 |
assert(NewExpr[1] == 0 && |
6452 |
assert(NewExpr[1] == 0 && |
| 6453 |
"Lone LLVM_arg in a DIExpression should refer to location-op 0."); |
6453 |
"Lone LLVM_arg in a DIExpression should refer to location-op 0."); |
| 6454 |
llvm::SmallVector ShortenedOps(llvm::drop_begin(NewExpr, 2)); |
6454 |
llvm::SmallVector ShortenedOps(llvm::drop_begin(NewExpr, 2)); |
| 6455 |
updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps); |
6455 |
updateDVIWithLocation(*DVIRec.DVI, NewLocationOps[0], ShortenedOps); |
| 6456 |
} else { |
6456 |
} else { |
| 6457 |
// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary. |
6457 |
// Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary. |
| 6458 |
updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr); |
6458 |
updateDVIWithLocations(*DVIRec.DVI, NewLocationOps, NewExpr); |
| 6459 |
} |
6459 |
} |
| 6460 |
|
6460 |
|
| 6461 |
// If the DIExpression was previously empty then add the stack terminator. |
6461 |
// If the DIExpression was previously empty then add the stack terminator. |
| 6462 |
// Non-empty expressions have only had elements inserted into them and so the |
6462 |
// Non-empty expressions have only had elements inserted into them and so the |
| 6463 |
// terminator should already be present e.g. stack_value or fragment. |
6463 |
// terminator should already be present e.g. stack_value or fragment. |
| 6464 |
DIExpression *SalvageExpr = DVIRec.DVI->getExpression(); |
6464 |
DIExpression *SalvageExpr = DVIRec.DVI->getExpression(); |
| 6465 |
if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) { |
6465 |
if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) { |
| 6466 |
SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value}); |
6466 |
SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value}); |
| 6467 |
DVIRec.DVI->setExpression(SalvageExpr); |
6467 |
DVIRec.DVI->setExpression(SalvageExpr); |
| 6468 |
} |
6468 |
} |
| 6469 |
} |
6469 |
} |
| 6470 |
|
6470 |
|
| 6471 |
/// Cached location ops may be erased during LSR, in which case a poison is |
6471 |
/// Cached location ops may be erased during LSR, in which case a poison is |
| 6472 |
/// required when restoring from the cache. The type of that location is no |
6472 |
/// required when restoring from the cache. The type of that location is no |
| 6473 |
/// longer available, so just use int8. The poison will be replaced by one or |
6473 |
/// longer available, so just use int8. The poison will be replaced by one or |
| 6474 |
/// more locations later when a SCEVDbgValueBuilder selects alternative |
6474 |
/// more locations later when a SCEVDbgValueBuilder selects alternative |
| 6475 |
/// locations to use for the salvage. |
6475 |
/// locations to use for the salvage. |
| 6476 |
static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) { |
6476 |
static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) { |
| 6477 |
return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C)); |
6477 |
return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C)); |
| 6478 |
} |
6478 |
} |
| 6479 |
|
6479 |
|
| 6480 |
/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values. |
6480 |
/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values. |
| 6481 |
static void restorePreTransformState(DVIRecoveryRec &DVIRec) { |
6481 |
static void restorePreTransformState(DVIRecoveryRec &DVIRec) { |
| 6482 |
LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n" |
6482 |
LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n" |
| 6483 |
<< "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n'); |
6483 |
<< "scev-salvage: post-LSR: " << *DVIRec.DVI << '\n'); |
| 6484 |
assert(DVIRec.Expr && "Expected an expression"); |
6484 |
assert(DVIRec.Expr && "Expected an expression"); |
| 6485 |
DVIRec.DVI->setExpression(DVIRec.Expr); |
6485 |
DVIRec.DVI->setExpression(DVIRec.Expr); |
| 6486 |
|
6486 |
|
| 6487 |
// Even a single location-op may be inside a DIArgList and referenced with |
6487 |
// Even a single location-op may be inside a DIArgList and referenced with |
| 6488 |
// DW_OP_LLVM_arg, which is valid only with a DIArgList. |
6488 |
// DW_OP_LLVM_arg, which is valid only with a DIArgList. |
| 6489 |
if (!DVIRec.HadLocationArgList) { |
6489 |
if (!DVIRec.HadLocationArgList) { |
| 6490 |
assert(DVIRec.LocationOps.size() == 1 && |
6490 |
assert(DVIRec.LocationOps.size() == 1 && |
| 6491 |
"Unexpected number of location ops."); |
6491 |
"Unexpected number of location ops."); |
| 6492 |
// LSR's unsuccessful salvage attempt may have added DIArgList, which in |
6492 |
// LSR's unsuccessful salvage attempt may have added DIArgList, which in |
| 6493 |
// this case was not present before, so force the location back to a single |
6493 |
// this case was not present before, so force the location back to a single |
| 6494 |
// uncontained Value. |
6494 |
// uncontained Value. |
| 6495 |
Value *CachedValue = |
6495 |
Value *CachedValue = |
| 6496 |
getValueOrPoison(DVIRec.LocationOps[0], DVIRec.DVI->getContext()); |
6496 |
getValueOrPoison(DVIRec.LocationOps[0], DVIRec.DVI->getContext()); |
| 6497 |
DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue)); |
6497 |
DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue)); |
| 6498 |
} else { |
6498 |
} else { |
| 6499 |
SmallVector MetadataLocs; |
6499 |
SmallVector MetadataLocs; |
| 6500 |
for (WeakVH VH : DVIRec.LocationOps) { |
6500 |
for (WeakVH VH : DVIRec.LocationOps) { |
| 6501 |
Value *CachedValue = getValueOrPoison(VH, DVIRec.DVI->getContext()); |
6501 |
Value *CachedValue = getValueOrPoison(VH, DVIRec.DVI->getContext()); |
| 6502 |
MetadataLocs.push_back(ValueAsMetadata::get(CachedValue)); |
6502 |
MetadataLocs.push_back(ValueAsMetadata::get(CachedValue)); |
| 6503 |
} |
6503 |
} |
| 6504 |
auto ValArrayRef = llvm::ArrayRef(MetadataLocs); |
6504 |
auto ValArrayRef = llvm::ArrayRef(MetadataLocs); |
| 6505 |
DVIRec.DVI->setRawLocation( |
6505 |
DVIRec.DVI->setRawLocation( |
| 6506 |
llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef)); |
6506 |
llvm::DIArgList::get(DVIRec.DVI->getContext(), ValArrayRef)); |
| 6507 |
} |
6507 |
} |
| 6508 |
LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n'); |
6508 |
LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DVIRec.DVI << '\n'); |
| 6509 |
} |
6509 |
} |
| 6510 |
|
6510 |
|
| 6511 |
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, |
6511 |
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, |
| 6512 |
llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, |
6512 |
llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, |
| 6513 |
const SCEV *SCEVInductionVar, |
6513 |
const SCEV *SCEVInductionVar, |
| 6514 |
SCEVDbgValueBuilder IterCountExpr) { |
6514 |
SCEVDbgValueBuilder IterCountExpr) { |
| 6515 |
if (!DVIRec.DVI->isKillLocation()) |
6515 |
if (!DVIRec.DVI->isKillLocation()) |
| 6516 |
return false; |
6516 |
return false; |
| 6517 |
|
6517 |
|
| 6518 |
// LSR may have caused several changes to the dbg.value in the failed salvage |
6518 |
// LSR may have caused several changes to the dbg.value in the failed salvage |
| 6519 |
// attempt. So restore the DIExpression, the location ops and also the |
6519 |
// attempt. So restore the DIExpression, the location ops and also the |
| 6520 |
// location ops format, which is always DIArglist for multiple ops, but only |
6520 |
// location ops format, which is always DIArglist for multiple ops, but only |
| 6521 |
// sometimes for a single op. |
6521 |
// sometimes for a single op. |
| 6522 |
restorePreTransformState(DVIRec); |
6522 |
restorePreTransformState(DVIRec); |
| 6523 |
|
6523 |
|
| 6524 |
// LocationOpIndexMap[i] will store the post-LSR location index of |
6524 |
// LocationOpIndexMap[i] will store the post-LSR location index of |
| 6525 |
// the non-optimised out location at pre-LSR index i. |
6525 |
// the non-optimised out location at pre-LSR index i. |
| 6526 |
SmallVector LocationOpIndexMap; |
6526 |
SmallVector LocationOpIndexMap; |
| 6527 |
LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1); |
6527 |
LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1); |
| 6528 |
SmallVector NewLocationOps; |
6528 |
SmallVector NewLocationOps; |
| 6529 |
NewLocationOps.push_back(LSRInductionVar); |
6529 |
NewLocationOps.push_back(LSRInductionVar); |
| 6530 |
|
6530 |
|
| 6531 |
for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) { |
6531 |
for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) { |
| 6532 |
WeakVH VH = DVIRec.LocationOps[i]; |
6532 |
WeakVH VH = DVIRec.LocationOps[i]; |
| 6533 |
// Place the locations not optimised out in the list first, avoiding |
6533 |
// Place the locations not optimised out in the list first, avoiding |
| 6534 |
// inserts later. The map is used to update the DIExpression's |
6534 |
// inserts later. The map is used to update the DIExpression's |
| 6535 |
// DW_OP_LLVM_arg arguments as the expression is updated. |
6535 |
// DW_OP_LLVM_arg arguments as the expression is updated. |
| 6536 |
if (VH && !isa(VH)) { |
6536 |
if (VH && !isa(VH)) { |
| 6537 |
NewLocationOps.push_back(VH); |
6537 |
NewLocationOps.push_back(VH); |
| 6538 |
LocationOpIndexMap[i] = NewLocationOps.size() - 1; |
6538 |
LocationOpIndexMap[i] = NewLocationOps.size() - 1; |
| 6539 |
LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i |
6539 |
LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i |
| 6540 |
<< " now at index " << LocationOpIndexMap[i] << "\n"); |
6540 |
<< " now at index " << LocationOpIndexMap[i] << "\n"); |
| 6541 |
continue; |
6541 |
continue; |
| 6542 |
} |
6542 |
} |
| 6543 |
|
6543 |
|
| 6544 |
// It's possible that a value referred to in the SCEV may have been |
6544 |
// It's possible that a value referred to in the SCEV may have been |
| 6545 |
// optimised out by LSR. |
6545 |
// optimised out by LSR. |
| 6546 |
if (SE.containsErasedValue(DVIRec.SCEVs[i]) || |
6546 |
if (SE.containsErasedValue(DVIRec.SCEVs[i]) || |
| 6547 |
SE.containsUndefs(DVIRec.SCEVs[i])) { |
6547 |
SE.containsUndefs(DVIRec.SCEVs[i])) { |
| 6548 |
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i |
6548 |
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i |
| 6549 |
<< " refers to a location that is now undef or erased. " |
6549 |
<< " refers to a location that is now undef or erased. " |
| 6550 |
"Salvage abandoned.\n"); |
6550 |
"Salvage abandoned.\n"); |
| 6551 |
return false; |
6551 |
return false; |
| 6552 |
} |
6552 |
} |
| 6553 |
|
6553 |
|
| 6554 |
LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i |
6554 |
LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i |
| 6555 |
<< " with SCEV: " << *DVIRec.SCEVs[i] << "\n"); |
6555 |
<< " with SCEV: " << *DVIRec.SCEVs[i] << "\n"); |
| 6556 |
|
6556 |
|
| 6557 |
DVIRec.RecoveryExprs[i] = std::make_unique(); |
6557 |
DVIRec.RecoveryExprs[i] = std::make_unique(); |
| 6558 |
SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get(); |
6558 |
SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get(); |
| 6559 |
|
6559 |
|
| 6560 |
// Create an offset-based salvage expression if possible, as it requires |
6560 |
// Create an offset-based salvage expression if possible, as it requires |
| 6561 |
// less DWARF ops than an iteration count-based expression. |
6561 |
// less DWARF ops than an iteration count-based expression. |
| 6562 |
if (std::optional Offset = |
6562 |
if (std::optional Offset = |
| 6563 |
SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) { |
6563 |
SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) { |
| 6564 |
if (Offset->getSignificantBits() <= 64) |
6564 |
if (Offset->getSignificantBits() <= 64) |
| 6565 |
SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar); |
6565 |
SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar); |
| 6566 |
} else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr, |
6566 |
} else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr, |
| 6567 |
SE)) |
6567 |
SE)) |
| 6568 |
return false; |
6568 |
return false; |
| 6569 |
} |
6569 |
} |
| 6570 |
|
6570 |
|
| 6571 |
// Merge the DbgValueBuilder generated expressions and the original |
6571 |
// Merge the DbgValueBuilder generated expressions and the original |
| 6572 |
// DIExpression, place the result into an new vector. |
6572 |
// DIExpression, place the result into an new vector. |
| 6573 |
SmallVector NewExpr; |
6573 |
SmallVector NewExpr; |
| 6574 |
if (DVIRec.Expr->getNumElements() == 0) { |
6574 |
if (DVIRec.Expr->getNumElements() == 0) { |
| 6575 |
assert(DVIRec.RecoveryExprs.size() == 1 && |
6575 |
assert(DVIRec.RecoveryExprs.size() == 1 && |
| 6576 |
"Expected only a single recovery expression for an empty " |
6576 |
"Expected only a single recovery expression for an empty " |
| 6577 |
"DIExpression."); |
6577 |
"DIExpression."); |
| 6578 |
assert(DVIRec.RecoveryExprs[0] && |
6578 |
assert(DVIRec.RecoveryExprs[0] && |
| 6579 |
"Expected a SCEVDbgSalvageBuilder for location 0"); |
6579 |
"Expected a SCEVDbgSalvageBuilder for location 0"); |
| 6580 |
SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get(); |
6580 |
SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get(); |
| 6581 |
B->appendToVectors(NewExpr, NewLocationOps); |
6581 |
B->appendToVectors(NewExpr, NewLocationOps); |
| 6582 |
} |
6582 |
} |
| 6583 |
for (const auto &Op : DVIRec.Expr->expr_ops()) { |
6583 |
for (const auto &Op : DVIRec.Expr->expr_ops()) { |
| 6584 |
// Most Ops needn't be updated. |
6584 |
// Most Ops needn't be updated. |
| 6585 |
if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { |
6585 |
if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { |
| 6586 |
Op.appendToVector(NewExpr); |
6586 |
Op.appendToVector(NewExpr); |
| 6587 |
continue; |
6587 |
continue; |
| 6588 |
} |
6588 |
} |
| 6589 |
|
6589 |
|
| 6590 |
uint64_t LocationArgIndex = Op.getArg(0); |
6590 |
uint64_t LocationArgIndex = Op.getArg(0); |
| 6591 |
SCEVDbgValueBuilder *DbgBuilder = |
6591 |
SCEVDbgValueBuilder *DbgBuilder = |
| 6592 |
DVIRec.RecoveryExprs[LocationArgIndex].get(); |
6592 |
DVIRec.RecoveryExprs[LocationArgIndex].get(); |
| 6593 |
// The location doesn't have s SCEVDbgValueBuilder, so LSR did not |
6593 |
// The location doesn't have s SCEVDbgValueBuilder, so LSR did not |
| 6594 |
// optimise it away. So just translate the argument to the updated |
6594 |
// optimise it away. So just translate the argument to the updated |
| 6595 |
// location index. |
6595 |
// location index. |
| 6596 |
if (!DbgBuilder) { |
6596 |
if (!DbgBuilder) { |
| 6597 |
NewExpr.push_back(dwarf::DW_OP_LLVM_arg); |
6597 |
NewExpr.push_back(dwarf::DW_OP_LLVM_arg); |
| 6598 |
assert(LocationOpIndexMap[Op.getArg(0)] != -1 && |
6598 |
assert(LocationOpIndexMap[Op.getArg(0)] != -1 && |
| 6599 |
"Expected a positive index for the location-op position."); |
6599 |
"Expected a positive index for the location-op position."); |
| 6600 |
NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]); |
6600 |
NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]); |
| 6601 |
continue; |
6601 |
continue; |
| 6602 |
} |
6602 |
} |
| 6603 |
// The location has a recovery expression. |
6603 |
// The location has a recovery expression. |
| 6604 |
DbgBuilder->appendToVectors(NewExpr, NewLocationOps); |
6604 |
DbgBuilder->appendToVectors(NewExpr, NewLocationOps); |
| 6605 |
} |
6605 |
} |
| 6606 |
|
6606 |
|
| 6607 |
UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr); |
6607 |
UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr); |
| 6608 |
LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n"); |
6608 |
LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DVI << "\n"); |
| 6609 |
return true; |
6609 |
return true; |
| 6610 |
} |
6610 |
} |
| 6611 |
|
6611 |
|
| 6612 |
/// Obtain an expression for the iteration count, then attempt to salvage the |
6612 |
/// Obtain an expression for the iteration count, then attempt to salvage the |
| 6613 |
/// dbg.value intrinsics. |
6613 |
/// dbg.value intrinsics. |
| 6614 |
static void |
6614 |
static void |
| 6615 |
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, |
6615 |
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, |
| 6616 |
llvm::PHINode *LSRInductionVar, |
6616 |
llvm::PHINode *LSRInductionVar, |
| 6617 |
SmallVector, 2> &DVIToUpdate) { |
6617 |
SmallVector, 2> &DVIToUpdate) { |
| 6618 |
if (DVIToUpdate.empty()) |
6618 |
if (DVIToUpdate.empty()) |
| 6619 |
return; |
6619 |
return; |
| 6620 |
|
6620 |
|
| 6621 |
const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar); |
6621 |
const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar); |
| 6622 |
assert(SCEVInductionVar && |
6622 |
assert(SCEVInductionVar && |
| 6623 |
"Anticipated a SCEV for the post-LSR induction variable"); |
6623 |
"Anticipated a SCEV for the post-LSR induction variable"); |
| 6624 |
|
6624 |
|
| 6625 |
if (const SCEVAddRecExpr *IVAddRec = |
6625 |
if (const SCEVAddRecExpr *IVAddRec = |
| 6626 |
dyn_cast(SCEVInductionVar)) { |
6626 |
dyn_cast(SCEVInductionVar)) { |
| 6627 |
if (!IVAddRec->isAffine()) |
6627 |
if (!IVAddRec->isAffine()) |
| 6628 |
return; |
6628 |
return; |
| 6629 |
|
6629 |
|
| 6630 |
// Prevent translation using excessive resources. |
6630 |
// Prevent translation using excessive resources. |
| 6631 |
if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize) |
6631 |
if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize) |
| 6632 |
return; |
6632 |
return; |
| 6633 |
|
6633 |
|
| 6634 |
// The iteration count is required to recover location values. |
6634 |
// The iteration count is required to recover location values. |
| 6635 |
SCEVDbgValueBuilder IterCountExpr; |
6635 |
SCEVDbgValueBuilder IterCountExpr; |
| 6636 |
IterCountExpr.pushLocation(LSRInductionVar); |
6636 |
IterCountExpr.pushLocation(LSRInductionVar); |
| 6637 |
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) |
6637 |
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) |
| 6638 |
return; |
6638 |
return; |
| 6639 |
|
6639 |
|
| 6640 |
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar |
6640 |
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar |
| 6641 |
<< '\n'); |
6641 |
<< '\n'); |
| 6642 |
|
6642 |
|
| 6643 |
for (auto &DVIRec : DVIToUpdate) { |
6643 |
for (auto &DVIRec : DVIToUpdate) { |
| 6644 |
SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar, |
6644 |
SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar, |
| 6645 |
IterCountExpr); |
6645 |
IterCountExpr); |
| 6646 |
} |
6646 |
} |
| 6647 |
} |
6647 |
} |
| 6648 |
} |
6648 |
} |
| 6649 |
|
6649 |
|
| 6650 |
/// Identify and cache salvageable DVI locations and expressions along with the |
6650 |
/// Identify and cache salvageable DVI locations and expressions along with the |
| 6651 |
/// corresponding SCEV(s). Also ensure that the DVI is not deleted between |
6651 |
/// corresponding SCEV(s). Also ensure that the DVI is not deleted between |
| 6652 |
/// cacheing and salvaging. |
6652 |
/// cacheing and salvaging. |
| 6653 |
static void DbgGatherSalvagableDVI( |
6653 |
static void DbgGatherSalvagableDVI( |
| 6654 |
Loop *L, ScalarEvolution &SE, |
6654 |
Loop *L, ScalarEvolution &SE, |
| 6655 |
SmallVector, 2> &SalvageableDVISCEVs, |
6655 |
SmallVector, 2> &SalvageableDVISCEVs, |
| 6656 |
SmallSet, 2> &DVIHandles) { |
6656 |
SmallSet, 2> &DVIHandles) { |
| 6657 |
for (const auto &B : L->getBlocks()) { |
6657 |
for (const auto &B : L->getBlocks()) { |
| 6658 |
for (auto &I : *B) { |
6658 |
for (auto &I : *B) { |
| 6659 |
auto DVI = dyn_cast(&I); |
6659 |
auto DVI = dyn_cast(&I); |
| 6660 |
if (!DVI) |
6660 |
if (!DVI) |
| 6661 |
continue; |
6661 |
continue; |
| 6662 |
// Ensure that if any location op is undef that the dbg.vlue is not |
6662 |
// Ensure that if any location op is undef that the dbg.vlue is not |
| 6663 |
// cached. |
6663 |
// cached. |
| 6664 |
if (DVI->isKillLocation()) |
6664 |
if (DVI->isKillLocation()) |
| 6665 |
continue; |
6665 |
continue; |
| 6666 |
|
6666 |
|
| 6667 |
// Check that the location op SCEVs are suitable for translation to |
6667 |
// Check that the location op SCEVs are suitable for translation to |
| 6668 |
// DIExpression. |
6668 |
// DIExpression. |
| 6669 |
const auto &HasTranslatableLocationOps = |
6669 |
const auto &HasTranslatableLocationOps = |
| 6670 |
[&](const DbgValueInst *DVI) -> bool { |
6670 |
[&](const DbgValueInst *DVI) -> bool { |
| 6671 |
for (const auto LocOp : DVI->location_ops()) { |
6671 |
for (const auto LocOp : DVI->location_ops()) { |
| 6672 |
if (!LocOp) |
6672 |
if (!LocOp) |
| 6673 |
return false; |
6673 |
return false; |
| 6674 |
|
6674 |
|
| 6675 |
if (!SE.isSCEVable(LocOp->getType())) |
6675 |
if (!SE.isSCEVable(LocOp->getType())) |
| 6676 |
return false; |
6676 |
return false; |
| 6677 |
|
6677 |
|
| 6678 |
const SCEV *S = SE.getSCEV(LocOp); |
6678 |
const SCEV *S = SE.getSCEV(LocOp); |
| 6679 |
if (SE.containsUndefs(S)) |
6679 |
if (SE.containsUndefs(S)) |
| 6680 |
return false; |
6680 |
return false; |
| 6681 |
} |
6681 |
} |
| 6682 |
return true; |
6682 |
return true; |
| 6683 |
}; |
6683 |
}; |
| 6684 |
|
6684 |
|
| 6685 |
if (!HasTranslatableLocationOps(DVI)) |
6685 |
if (!HasTranslatableLocationOps(DVI)) |
| 6686 |
continue; |
6686 |
continue; |
| 6687 |
|
6687 |
|
| 6688 |
std::unique_ptr NewRec = |
6688 |
std::unique_ptr NewRec = |
| 6689 |
std::make_unique(DVI); |
6689 |
std::make_unique(DVI); |
| 6690 |
// Each location Op may need a SCEVDbgValueBuilder in order to recover it. |
6690 |
// Each location Op may need a SCEVDbgValueBuilder in order to recover it. |
| 6691 |
// Pre-allocating a vector will enable quick lookups of the builder later |
6691 |
// Pre-allocating a vector will enable quick lookups of the builder later |
| 6692 |
// during the salvage. |
6692 |
// during the salvage. |
| 6693 |
NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps()); |
6693 |
NewRec->RecoveryExprs.resize(DVI->getNumVariableLocationOps()); |
| 6694 |
for (const auto LocOp : DVI->location_ops()) { |
6694 |
for (const auto LocOp : DVI->location_ops()) { |
| 6695 |
NewRec->SCEVs.push_back(SE.getSCEV(LocOp)); |
6695 |
NewRec->SCEVs.push_back(SE.getSCEV(LocOp)); |
| 6696 |
NewRec->LocationOps.push_back(LocOp); |
6696 |
NewRec->LocationOps.push_back(LocOp); |
| 6697 |
NewRec->HadLocationArgList = DVI->hasArgList(); |
6697 |
NewRec->HadLocationArgList = DVI->hasArgList(); |
| 6698 |
} |
6698 |
} |
| 6699 |
SalvageableDVISCEVs.push_back(std::move(NewRec)); |
6699 |
SalvageableDVISCEVs.push_back(std::move(NewRec)); |
| 6700 |
DVIHandles.insert(DVI); |
6700 |
DVIHandles.insert(DVI); |
| 6701 |
} |
6701 |
} |
| 6702 |
} |
6702 |
} |
| 6703 |
} |
6703 |
} |
| 6704 |
|
6704 |
|
| 6705 |
/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback |
6705 |
/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback |
| 6706 |
/// any PHi from the loop header is usable, but may have less chance of |
6706 |
/// any PHi from the loop header is usable, but may have less chance of |
| 6707 |
/// surviving subsequent transforms. |
6707 |
/// surviving subsequent transforms. |
| 6708 |
static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, |
6708 |
static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, |
| 6709 |
const LSRInstance &LSR) { |
6709 |
const LSRInstance &LSR) { |
| 6710 |
|
6710 |
|
| 6711 |
auto IsSuitableIV = [&](PHINode *P) { |
6711 |
auto IsSuitableIV = [&](PHINode *P) { |
| 6712 |
if (!SE.isSCEVable(P->getType())) |
6712 |
if (!SE.isSCEVable(P->getType())) |
| 6713 |
return false; |
6713 |
return false; |
| 6714 |
if (const SCEVAddRecExpr *Rec = dyn_cast(SE.getSCEV(P))) |
6714 |
if (const SCEVAddRecExpr *Rec = dyn_cast(SE.getSCEV(P))) |
| 6715 |
return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P)); |
6715 |
return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P)); |
| 6716 |
return false; |
6716 |
return false; |
| 6717 |
}; |
6717 |
}; |
| 6718 |
|
6718 |
|
| 6719 |
// For now, just pick the first IV that was generated and inserted by |
6719 |
// For now, just pick the first IV that was generated and inserted by |
| 6720 |
// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away |
6720 |
// ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away |
| 6721 |
// by subsequent transforms. |
6721 |
// by subsequent transforms. |
| 6722 |
for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) { |
6722 |
for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) { |
| 6723 |
if (!IV) |
6723 |
if (!IV) |
| 6724 |
continue; |
6724 |
continue; |
| 6725 |
|
6725 |
|
| 6726 |
// There should only be PHI node IVs. |
6726 |
// There should only be PHI node IVs. |
| 6727 |
PHINode *P = cast(&*IV); |
6727 |
PHINode *P = cast(&*IV); |
| 6728 |
|
6728 |
|
| 6729 |
if (IsSuitableIV(P)) |
6729 |
if (IsSuitableIV(P)) |
| 6730 |
return P; |
6730 |
return P; |
| 6731 |
} |
6731 |
} |
| 6732 |
|
6732 |
|
| 6733 |
for (PHINode &P : L.getHeader()->phis()) { |
6733 |
for (PHINode &P : L.getHeader()->phis()) { |
| 6734 |
if (IsSuitableIV(&P)) |
6734 |
if (IsSuitableIV(&P)) |
| 6735 |
return &P; |
6735 |
return &P; |
| 6736 |
} |
6736 |
} |
| 6737 |
return nullptr; |
6737 |
return nullptr; |
| 6738 |
} |
6738 |
} |
| 6739 |
|
6739 |
|
| 6740 |
static std::optional> |
6740 |
static std::optional> |
| 6741 |
canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, |
6741 |
canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, |
| 6742 |
const LoopInfo &LI) { |
6742 |
const LoopInfo &LI) { |
| 6743 |
if (!L->isInnermost()) { |
6743 |
if (!L->isInnermost()) { |
| 6744 |
LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n"); |
6744 |
LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n"); |
| 6745 |
return std::nullopt; |
6745 |
return std::nullopt; |
| 6746 |
} |
6746 |
} |
| 6747 |
// Only inspect on simple loop structure |
6747 |
// Only inspect on simple loop structure |
| 6748 |
if (!L->isLoopSimplifyForm()) { |
6748 |
if (!L->isLoopSimplifyForm()) { |
| 6749 |
LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n"); |
6749 |
LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n"); |
| 6750 |
return std::nullopt; |
6750 |
return std::nullopt; |
| 6751 |
} |
6751 |
} |
| 6752 |
|
6752 |
|
| 6753 |
if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { |
6753 |
if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { |
| 6754 |
LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n"); |
6754 |
LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n"); |
| 6755 |
return std::nullopt; |
6755 |
return std::nullopt; |
| 6756 |
} |
6756 |
} |
| 6757 |
|
6757 |
|
| 6758 |
BasicBlock *LoopLatch = L->getLoopLatch(); |
6758 |
BasicBlock *LoopLatch = L->getLoopLatch(); |
| 6759 |
BranchInst *BI = dyn_cast(LoopLatch->getTerminator()); |
6759 |
BranchInst *BI = dyn_cast(LoopLatch->getTerminator()); |
| 6760 |
if (!BI || BI->isUnconditional()) |
6760 |
if (!BI || BI->isUnconditional()) |
| 6761 |
return std::nullopt; |
6761 |
return std::nullopt; |
| 6762 |
auto *TermCond = dyn_cast(BI->getCondition()); |
6762 |
auto *TermCond = dyn_cast(BI->getCondition()); |
| 6763 |
if (!TermCond) { |
6763 |
if (!TermCond) { |
| 6764 |
LLVM_DEBUG( |
6764 |
LLVM_DEBUG( |
| 6765 |
dbgs() << "Cannot fold on branching condition that is not an ICmpInst"); |
6765 |
dbgs() << "Cannot fold on branching condition that is not an ICmpInst"); |
| 6766 |
return std::nullopt; |
6766 |
return std::nullopt; |
| 6767 |
} |
6767 |
} |
| 6768 |
if (!TermCond->hasOneUse()) { |
6768 |
if (!TermCond->hasOneUse()) { |
| 6769 |
LLVM_DEBUG( |
6769 |
LLVM_DEBUG( |
| 6770 |
dbgs() |
6770 |
dbgs() |
| 6771 |
<< "Cannot replace terminating condition with more than one use\n"); |
6771 |
<< "Cannot replace terminating condition with more than one use\n"); |
| 6772 |
return std::nullopt; |
6772 |
return std::nullopt; |
| 6773 |
} |
6773 |
} |
| 6774 |
|
6774 |
|
| 6775 |
BinaryOperator *LHS = dyn_cast(TermCond->getOperand(0)); |
6775 |
BinaryOperator *LHS = dyn_cast(TermCond->getOperand(0)); |
| 6776 |
Value *RHS = TermCond->getOperand(1); |
6776 |
Value *RHS = TermCond->getOperand(1); |
| 6777 |
if (!LHS || !L->isLoopInvariant(RHS)) |
6777 |
if (!LHS || !L->isLoopInvariant(RHS)) |
| 6778 |
// We could pattern match the inverse form of the icmp, but that is |
6778 |
// We could pattern match the inverse form of the icmp, but that is |
| 6779 |
// non-canonical, and this pass is running *very* late in the pipeline. |
6779 |
// non-canonical, and this pass is running *very* late in the pipeline. |
| 6780 |
return std::nullopt; |
6780 |
return std::nullopt; |
| 6781 |
|
6781 |
|
| 6782 |
// Find the IV used by the current exit condition. |
6782 |
// Find the IV used by the current exit condition. |
| 6783 |
PHINode *ToFold; |
6783 |
PHINode *ToFold; |
| 6784 |
Value *ToFoldStart, *ToFoldStep; |
6784 |
Value *ToFoldStart, *ToFoldStep; |
| 6785 |
if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep)) |
6785 |
if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep)) |
| 6786 |
return std::nullopt; |
6786 |
return std::nullopt; |
| 6787 |
|
6787 |
|
| 6788 |
// If that IV isn't dead after we rewrite the exit condition in terms of |
6788 |
// If that IV isn't dead after we rewrite the exit condition in terms of |
| 6789 |
// another IV, there's no point in doing the transform. |
6789 |
// another IV, there's no point in doing the transform. |
| 6790 |
if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond)) |
6790 |
if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond)) |
| 6791 |
return std::nullopt; |
6791 |
return std::nullopt; |
| 6792 |
|
6792 |
|
| 6793 |
const SCEV *BECount = SE.getBackedgeTakenCount(L); |
6793 |
const SCEV *BECount = SE.getBackedgeTakenCount(L); |
| 6794 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
6794 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
| 6795 |
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); |
6795 |
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); |
| 6796 |
|
6796 |
|
| 6797 |
PHINode *ToHelpFold = nullptr; |
6797 |
PHINode *ToHelpFold = nullptr; |
| 6798 |
const SCEV *TermValueS = nullptr; |
6798 |
const SCEV *TermValueS = nullptr; |
| 6799 |
bool MustDropPoison = false; |
6799 |
bool MustDropPoison = false; |
| 6800 |
for (PHINode &PN : L->getHeader()->phis()) { |
6800 |
for (PHINode &PN : L->getHeader()->phis()) { |
| 6801 |
if (ToFold == &PN) |
6801 |
if (ToFold == &PN) |
| 6802 |
continue; |
6802 |
continue; |
| 6803 |
|
6803 |
|
| 6804 |
if (!SE.isSCEVable(PN.getType())) { |
6804 |
if (!SE.isSCEVable(PN.getType())) { |
| 6805 |
LLVM_DEBUG(dbgs() << "IV of phi '" << PN |
6805 |
LLVM_DEBUG(dbgs() << "IV of phi '" << PN |
| 6806 |
<< "' is not SCEV-able, not qualified for the " |
6806 |
<< "' is not SCEV-able, not qualified for the " |
| 6807 |
"terminating condition folding.\n"); |
6807 |
"terminating condition folding.\n"); |
| 6808 |
continue; |
6808 |
continue; |
| 6809 |
} |
6809 |
} |
| 6810 |
const SCEVAddRecExpr *AddRec = dyn_cast(SE.getSCEV(&PN)); |
6810 |
const SCEVAddRecExpr *AddRec = dyn_cast(SE.getSCEV(&PN)); |
| 6811 |
// Only speculate on affine AddRec |
6811 |
// Only speculate on affine AddRec |
| 6812 |
if (!AddRec || !AddRec->isAffine()) { |
6812 |
if (!AddRec || !AddRec->isAffine()) { |
| 6813 |
LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN |
6813 |
LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN |
| 6814 |
<< "' is not an affine add recursion, not qualified " |
6814 |
<< "' is not an affine add recursion, not qualified " |
| 6815 |
"for the terminating condition folding.\n"); |
6815 |
"for the terminating condition folding.\n"); |
| 6816 |
continue; |
6816 |
continue; |
| 6817 |
} |
6817 |
} |
| 6818 |
|
6818 |
|
| 6819 |
// Check that we can compute the value of AddRec on the exiting iteration |
6819 |
// Check that we can compute the value of AddRec on the exiting iteration |
| 6820 |
// without soundness problems. evaluateAtIteration internally needs |
6820 |
// without soundness problems. evaluateAtIteration internally needs |
| 6821 |
// to multiply the stride of the iteration number - which may wrap around. |
6821 |
// to multiply the stride of the iteration number - which may wrap around. |
| 6822 |
// The issue here is subtle because computing the result accounting for |
6822 |
// The issue here is subtle because computing the result accounting for |
| 6823 |
// wrap is insufficient. In order to use the result in an exit test, we |
6823 |
// wrap is insufficient. In order to use the result in an exit test, we |
| 6824 |
// must also know that AddRec doesn't take the same value on any previous |
6824 |
// must also know that AddRec doesn't take the same value on any previous |
| 6825 |
// iteration. The simplest case to consider is a candidate IV which is |
6825 |
// iteration. The simplest case to consider is a candidate IV which is |
| 6826 |
// narrower than the trip count (and thus original IV), but this can |
6826 |
// narrower than the trip count (and thus original IV), but this can |
| 6827 |
// also happen due to non-unit strides on the candidate IVs. |
6827 |
// also happen due to non-unit strides on the candidate IVs. |
| 6828 |
if (!AddRec->hasNoSelfWrap()) |
6828 |
if (!AddRec->hasNoSelfWrap()) |
| 6829 |
continue; |
6829 |
continue; |
| 6830 |
|
6830 |
|
| 6831 |
const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE); |
6831 |
const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE); |
| 6832 |
const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE); |
6832 |
const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE); |
| 6833 |
if (!Expander.isSafeToExpand(TermValueSLocal)) { |
6833 |
if (!Expander.isSafeToExpand(TermValueSLocal)) { |
| 6834 |
LLVM_DEBUG( |
6834 |
LLVM_DEBUG( |
| 6835 |
dbgs() << "Is not safe to expand terminating value for phi node" << PN |
6835 |
dbgs() << "Is not safe to expand terminating value for phi node" << PN |
| 6836 |
<< "\n"); |
6836 |
<< "\n"); |
| 6837 |
continue; |
6837 |
continue; |
| 6838 |
} |
6838 |
} |
| 6839 |
|
6839 |
|
| 6840 |
// The candidate IV may have been otherwise dead and poison from the |
6840 |
// The candidate IV may have been otherwise dead and poison from the |
| 6841 |
// very first iteration. If we can't disprove that, we can't use the IV. |
6841 |
// very first iteration. If we can't disprove that, we can't use the IV. |
| 6842 |
if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) { |
6842 |
if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) { |
| 6843 |
LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV " |
6843 |
LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV " |
| 6844 |
<< PN << "\n"); |
6844 |
<< PN << "\n"); |
| 6845 |
continue; |
6845 |
continue; |
| 6846 |
} |
6846 |
} |
| 6847 |
|
6847 |
|
| 6848 |
// The candidate IV may become poison on the last iteration. If this |
6848 |
// The candidate IV may become poison on the last iteration. If this |
| 6849 |
// value is not branched on, this is a well defined program. We're |
6849 |
// value is not branched on, this is a well defined program. We're |
| 6850 |
// about to add a new use to this IV, and we have to ensure we don't |
6850 |
// about to add a new use to this IV, and we have to ensure we don't |
| 6851 |
// insert UB which didn't previously exist. |
6851 |
// insert UB which didn't previously exist. |
| 6852 |
bool MustDropPoisonLocal = false; |
6852 |
bool MustDropPoisonLocal = false; |
| 6853 |
Instruction *PostIncV = |
6853 |
Instruction *PostIncV = |
| 6854 |
cast(PN.getIncomingValueForBlock(LoopLatch)); |
6854 |
cast(PN.getIncomingValueForBlock(LoopLatch)); |
| 6855 |
if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(), |
6855 |
if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(), |
| 6856 |
&DT)) { |
6856 |
&DT)) { |
| 6857 |
LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use" |
6857 |
LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use" |
| 6858 |
<< PN << "\n"); |
6858 |
<< PN << "\n"); |
| 6859 |
|
6859 |
|
| 6860 |
// If this is a complex recurrance with multiple instructions computing |
6860 |
// If this is a complex recurrance with multiple instructions computing |
| 6861 |
// the backedge value, we might need to strip poison flags from all of |
6861 |
// the backedge value, we might need to strip poison flags from all of |
| 6862 |
// them. |
6862 |
// them. |
| 6863 |
if (PostIncV->getOperand(0) != &PN) |
6863 |
if (PostIncV->getOperand(0) != &PN) |
| 6864 |
continue; |
6864 |
continue; |
| 6865 |
|
6865 |
|
| 6866 |
// In order to perform the transform, we need to drop the poison generating |
6866 |
// In order to perform the transform, we need to drop the poison generating |
| 6867 |
// flags on this instruction (if any). |
6867 |
// flags on this instruction (if any). |
| 6868 |
MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags(); |
6868 |
MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags(); |
| 6869 |
} |
6869 |
} |
| 6870 |
|
6870 |
|
| 6871 |
// We pick the last legal alternate IV. We could expore choosing an optimal |
6871 |
// We pick the last legal alternate IV. We could expore choosing an optimal |
| 6872 |
// alternate IV if we had a decent heuristic to do so. |
6872 |
// alternate IV if we had a decent heuristic to do so. |
| 6873 |
ToHelpFold = &PN; |
6873 |
ToHelpFold = &PN; |
| 6874 |
TermValueS = TermValueSLocal; |
6874 |
TermValueS = TermValueSLocal; |
| 6875 |
MustDropPoison = MustDropPoisonLocal; |
6875 |
MustDropPoison = MustDropPoisonLocal; |
| 6876 |
} |
6876 |
} |
| 6877 |
|
6877 |
|
| 6878 |
LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs() |
6878 |
LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs() |
| 6879 |
<< "Cannot find other AddRec IV to help folding\n";); |
6879 |
<< "Cannot find other AddRec IV to help folding\n";); |
| 6880 |
|
6880 |
|
| 6881 |
LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs() |
6881 |
LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs() |
| 6882 |
<< "\nFound loop that can fold terminating condition\n" |
6882 |
<< "\nFound loop that can fold terminating condition\n" |
| 6883 |
<< " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n" |
6883 |
<< " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n" |
| 6884 |
<< " TermCond: " << *TermCond << "\n" |
6884 |
<< " TermCond: " << *TermCond << "\n" |
| 6885 |
<< " BrandInst: " << *BI << "\n" |
6885 |
<< " BrandInst: " << *BI << "\n" |
| 6886 |
<< " ToFold: " << *ToFold << "\n" |
6886 |
<< " ToFold: " << *ToFold << "\n" |
| 6887 |
<< " ToHelpFold: " << *ToHelpFold << "\n"); |
6887 |
<< " ToHelpFold: " << *ToHelpFold << "\n"); |
| 6888 |
|
6888 |
|
| 6889 |
if (!ToFold || !ToHelpFold) |
6889 |
if (!ToFold || !ToHelpFold) |
| 6890 |
return std::nullopt; |
6890 |
return std::nullopt; |
| 6891 |
return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison); |
6891 |
return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison); |
| 6892 |
} |
6892 |
} |
| 6893 |
|
6893 |
|
| 6894 |
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, |
6894 |
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, |
| 6895 |
DominatorTree &DT, LoopInfo &LI, |
6895 |
DominatorTree &DT, LoopInfo &LI, |
| 6896 |
const TargetTransformInfo &TTI, |
6896 |
const TargetTransformInfo &TTI, |
| 6897 |
AssumptionCache &AC, TargetLibraryInfo &TLI, |
6897 |
AssumptionCache &AC, TargetLibraryInfo &TLI, |
| 6898 |
MemorySSA *MSSA) { |
6898 |
MemorySSA *MSSA) { |
| 6899 |
|
6899 |
|
| 6900 |
// Debug preservation - before we start removing anything identify which DVI |
6900 |
// Debug preservation - before we start removing anything identify which DVI |
| 6901 |
// meet the salvageable criteria and store their DIExpression and SCEVs. |
6901 |
// meet the salvageable criteria and store their DIExpression and SCEVs. |
| 6902 |
SmallVector, 2> SalvageableDVIRecords; |
6902 |
SmallVector, 2> SalvageableDVIRecords; |
| 6903 |
SmallSet, 2> DVIHandles; |
6903 |
SmallSet, 2> DVIHandles; |
| 6904 |
DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles); |
6904 |
DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles); |
| 6905 |
|
6905 |
|
| 6906 |
bool Changed = false; |
6906 |
bool Changed = false; |
| 6907 |
std::unique_ptr MSSAU; |
6907 |
std::unique_ptr MSSAU; |
| 6908 |
if (MSSA) |
6908 |
if (MSSA) |
| 6909 |
MSSAU = std::make_unique(MSSA); |
6909 |
MSSAU = std::make_unique(MSSA); |
| 6910 |
|
6910 |
|
| 6911 |
// Run the main LSR transformation. |
6911 |
// Run the main LSR transformation. |
| 6912 |
const LSRInstance &Reducer = |
6912 |
const LSRInstance &Reducer = |
| 6913 |
LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()); |
6913 |
LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()); |
| 6914 |
Changed |= Reducer.getChanged(); |
6914 |
Changed |= Reducer.getChanged(); |
| 6915 |
|
6915 |
|
| 6916 |
// Remove any extra phis created by processing inner loops. |
6916 |
// Remove any extra phis created by processing inner loops. |
| 6917 |
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
6917 |
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
| 6918 |
if (EnablePhiElim && L->isLoopSimplifyForm()) { |
6918 |
if (EnablePhiElim && L->isLoopSimplifyForm()) { |
| 6919 |
SmallVector DeadInsts; |
6919 |
SmallVector DeadInsts; |
| 6920 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
6920 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
| 6921 |
SCEVExpander Rewriter(SE, DL, "lsr", false); |
6921 |
SCEVExpander Rewriter(SE, DL, "lsr", false); |
| 6922 |
#ifndef NDEBUG |
6922 |
#ifndef NDEBUG |
| 6923 |
Rewriter.setDebugType(DEBUG_TYPE); |
6923 |
Rewriter.setDebugType(DEBUG_TYPE); |
| 6924 |
#endif |
6924 |
#endif |
| 6925 |
unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI); |
6925 |
unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI); |
| 6926 |
if (numFolded) { |
6926 |
if (numFolded) { |
| 6927 |
Changed = true; |
6927 |
Changed = true; |
| 6928 |
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, |
6928 |
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, |
| 6929 |
MSSAU.get()); |
6929 |
MSSAU.get()); |
| 6930 |
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
6930 |
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
| 6931 |
} |
6931 |
} |
| 6932 |
} |
6932 |
} |
| 6933 |
// LSR may at times remove all uses of an induction variable from a loop. |
6933 |
// LSR may at times remove all uses of an induction variable from a loop. |
| 6934 |
// The only remaining use is the PHI in the exit block. |
6934 |
// The only remaining use is the PHI in the exit block. |
| 6935 |
// When this is the case, if the exit value of the IV can be calculated using |
6935 |
// When this is the case, if the exit value of the IV can be calculated using |
| 6936 |
// SCEV, we can replace the exit block PHI with the final value of the IV and |
6936 |
// SCEV, we can replace the exit block PHI with the final value of the IV and |
| 6937 |
// skip the updates in each loop iteration. |
6937 |
// skip the updates in each loop iteration. |
| 6938 |
if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) { |
6938 |
if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) { |
| 6939 |
SmallVector DeadInsts; |
6939 |
SmallVector DeadInsts; |
| 6940 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
6940 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
| 6941 |
SCEVExpander Rewriter(SE, DL, "lsr", true); |
6941 |
SCEVExpander Rewriter(SE, DL, "lsr", true); |
| 6942 |
int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT, |
6942 |
int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT, |
| 6943 |
UnusedIndVarInLoop, DeadInsts); |
6943 |
UnusedIndVarInLoop, DeadInsts); |
| 6944 |
if (Rewrites) { |
6944 |
if (Rewrites) { |
| 6945 |
Changed = true; |
6945 |
Changed = true; |
| 6946 |
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, |
6946 |
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, |
| 6947 |
MSSAU.get()); |
6947 |
MSSAU.get()); |
| 6948 |
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
6948 |
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
| 6949 |
} |
6949 |
} |
| 6950 |
} |
6950 |
} |
| 6951 |
|
6951 |
|
| 6952 |
if (AllowTerminatingConditionFoldingAfterLSR) { |
6952 |
if (AllowTerminatingConditionFoldingAfterLSR) { |
| 6953 |
if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) { |
6953 |
if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) { |
| 6954 |
auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt; |
6954 |
auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt; |
| 6955 |
|
6955 |
|
| 6956 |
Changed = true; |
6956 |
Changed = true; |
| 6957 |
NumTermFold++; |
6957 |
NumTermFold++; |
| 6958 |
|
6958 |
|
| 6959 |
BasicBlock *LoopPreheader = L->getLoopPreheader(); |
6959 |
BasicBlock *LoopPreheader = L->getLoopPreheader(); |
| 6960 |
BasicBlock *LoopLatch = L->getLoopLatch(); |
6960 |
BasicBlock *LoopLatch = L->getLoopLatch(); |
| 6961 |
|
6961 |
|
| 6962 |
(void)ToFold; |
6962 |
(void)ToFold; |
| 6963 |
LLVM_DEBUG(dbgs() << "To fold phi-node:\n" |
6963 |
LLVM_DEBUG(dbgs() << "To fold phi-node:\n" |
| 6964 |
<< *ToFold << "\n" |
6964 |
<< *ToFold << "\n" |
| 6965 |
<< "New term-cond phi-node:\n" |
6965 |
<< "New term-cond phi-node:\n" |
| 6966 |
<< *ToHelpFold << "\n"); |
6966 |
<< *ToHelpFold << "\n"); |
| 6967 |
|
6967 |
|
| 6968 |
Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader); |
6968 |
Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader); |
| 6969 |
(void)StartValue; |
6969 |
(void)StartValue; |
| 6970 |
Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch); |
6970 |
Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch); |
| 6971 |
|
6971 |
|
| 6972 |
// See comment in canFoldTermCondOfLoop on why this is sufficient. |
6972 |
// See comment in canFoldTermCondOfLoop on why this is sufficient. |
| 6973 |
if (MustDrop) |
6973 |
if (MustDrop) |
| 6974 |
cast(LoopValue)->dropPoisonGeneratingFlags(); |
6974 |
cast(LoopValue)->dropPoisonGeneratingFlags(); |
| 6975 |
|
6975 |
|
| 6976 |
// SCEVExpander for both use in preheader and latch |
6976 |
// SCEVExpander for both use in preheader and latch |
| 6977 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
6977 |
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); |
| 6978 |
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); |
6978 |
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); |
| 6979 |
SCEVExpanderCleaner ExpCleaner(Expander); |
6979 |
SCEVExpanderCleaner ExpCleaner(Expander); |
| 6980 |
|
6980 |
|
| 6981 |
assert(Expander.isSafeToExpand(TermValueS) && |
6981 |
assert(Expander.isSafeToExpand(TermValueS) && |
| 6982 |
"Terminating value was checked safe in canFoldTerminatingCondition"); |
6982 |
"Terminating value was checked safe in canFoldTerminatingCondition"); |
| 6983 |
|
6983 |
|
| 6984 |
// Create new terminating value at loop header |
6984 |
// Create new terminating value at loop header |
| 6985 |
Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(), |
6985 |
Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(), |
| 6986 |
LoopPreheader->getTerminator()); |
6986 |
LoopPreheader->getTerminator()); |
| 6987 |
|
6987 |
|
| 6988 |
LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n" |
6988 |
LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n" |
| 6989 |
<< *StartValue << "\n" |
6989 |
<< *StartValue << "\n" |
| 6990 |
<< "Terminating value of new term-cond phi-node:\n" |
6990 |
<< "Terminating value of new term-cond phi-node:\n" |
| 6991 |
<< *TermValue << "\n"); |
6991 |
<< *TermValue << "\n"); |
| 6992 |
|
6992 |
|
| 6993 |
// Create new terminating condition at loop latch |
6993 |
// Create new terminating condition at loop latch |
| 6994 |
BranchInst *BI = cast(LoopLatch->getTerminator()); |
6994 |
BranchInst *BI = cast(LoopLatch->getTerminator()); |
| 6995 |
ICmpInst *OldTermCond = cast(BI->getCondition()); |
6995 |
ICmpInst *OldTermCond = cast(BI->getCondition()); |
| 6996 |
IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); |
6996 |
IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); |
| 6997 |
Value *NewTermCond = |
6997 |
Value *NewTermCond = |
| 6998 |
LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue, |
6998 |
LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue, |
| 6999 |
"lsr_fold_term_cond.replaced_term_cond"); |
6999 |
"lsr_fold_term_cond.replaced_term_cond"); |
| 7000 |
// Swap successors to exit loop body if IV equals to new TermValue |
7000 |
// Swap successors to exit loop body if IV equals to new TermValue |
| 7001 |
if (BI->getSuccessor(0) == L->getHeader()) |
7001 |
if (BI->getSuccessor(0) == L->getHeader()) |
| 7002 |
BI->swapSuccessors(); |
7002 |
BI->swapSuccessors(); |
| 7003 |
|
7003 |
|
| 7004 |
LLVM_DEBUG(dbgs() << "Old term-cond:\n" |
7004 |
LLVM_DEBUG(dbgs() << "Old term-cond:\n" |
| 7005 |
<< *OldTermCond << "\n" |
7005 |
<< *OldTermCond << "\n" |
| 7006 |
<< "New term-cond:\b" << *NewTermCond << "\n"); |
7006 |
<< "New term-cond:\b" << *NewTermCond << "\n"); |
| 7007 |
|
7007 |
|
| 7008 |
BI->setCondition(NewTermCond); |
7008 |
BI->setCondition(NewTermCond); |
| 7009 |
|
7009 |
|
| 7010 |
OldTermCond->eraseFromParent(); |
7010 |
OldTermCond->eraseFromParent(); |
| 7011 |
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
7011 |
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); |
| 7012 |
|
7012 |
|
| 7013 |
ExpCleaner.markResultUsed(); |
7013 |
ExpCleaner.markResultUsed(); |
| 7014 |
} |
7014 |
} |
| 7015 |
} |
7015 |
} |
| 7016 |
|
7016 |
|
| 7017 |
if (SalvageableDVIRecords.empty()) |
7017 |
if (SalvageableDVIRecords.empty()) |
| 7018 |
return Changed; |
7018 |
return Changed; |
| 7019 |
|
7019 |
|
| 7020 |
// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with |
7020 |
// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with |
| 7021 |
// expressions composed using the derived iteration count. |
7021 |
// expressions composed using the derived iteration count. |
| 7022 |
// TODO: Allow for multiple IV references for nested AddRecSCEVs |
7022 |
// TODO: Allow for multiple IV references for nested AddRecSCEVs |
| 7023 |
for (const auto &L : LI) { |
7023 |
for (const auto &L : LI) { |
| 7024 |
if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer)) |
7024 |
if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer)) |
| 7025 |
DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords); |
7025 |
DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords); |
| 7026 |
else { |
7026 |
else { |
| 7027 |
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV " |
7027 |
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV " |
| 7028 |
"could not be identified.\n"); |
7028 |
"could not be identified.\n"); |
| 7029 |
} |
7029 |
} |
| 7030 |
} |
7030 |
} |
| 7031 |
|
7031 |
|
| 7032 |
for (auto &Rec : SalvageableDVIRecords) |
7032 |
for (auto &Rec : SalvageableDVIRecords) |
| 7033 |
Rec->clear(); |
7033 |
Rec->clear(); |
| 7034 |
SalvageableDVIRecords.clear(); |
7034 |
SalvageableDVIRecords.clear(); |
| 7035 |
DVIHandles.clear(); |
7035 |
DVIHandles.clear(); |
| 7036 |
return Changed; |
7036 |
return Changed; |
| 7037 |
} |
7037 |
} |
| 7038 |
|
7038 |
|
| 7039 |
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { |
7039 |
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { |
| 7040 |
if (skipLoop(L)) |
7040 |
if (skipLoop(L)) |
| 7041 |
return false; |
7041 |
return false; |
| 7042 |
|
7042 |
|
| 7043 |
auto &IU = getAnalysis().getIU(); |
7043 |
auto &IU = getAnalysis().getIU(); |
| 7044 |
auto &SE = getAnalysis().getSE(); |
7044 |
auto &SE = getAnalysis().getSE(); |
| 7045 |
auto &DT = getAnalysis().getDomTree(); |
7045 |
auto &DT = getAnalysis().getDomTree(); |
| 7046 |
auto &LI = getAnalysis().getLoopInfo(); |
7046 |
auto &LI = getAnalysis().getLoopInfo(); |
| 7047 |
const auto &TTI = getAnalysis().getTTI( |
7047 |
const auto &TTI = getAnalysis().getTTI( |
| 7048 |
*L->getHeader()->getParent()); |
7048 |
*L->getHeader()->getParent()); |
| 7049 |
auto &AC = getAnalysis().getAssumptionCache( |
7049 |
auto &AC = getAnalysis().getAssumptionCache( |
| 7050 |
*L->getHeader()->getParent()); |
7050 |
*L->getHeader()->getParent()); |
| 7051 |
auto &TLI = getAnalysis().getTLI( |
7051 |
auto &TLI = getAnalysis().getTLI( |
| 7052 |
*L->getHeader()->getParent()); |
7052 |
*L->getHeader()->getParent()); |
| 7053 |
auto *MSSAAnalysis = getAnalysisIfAvailable(); |
7053 |
auto *MSSAAnalysis = getAnalysisIfAvailable(); |
| 7054 |
MemorySSA *MSSA = nullptr; |
7054 |
MemorySSA *MSSA = nullptr; |
| 7055 |
if (MSSAAnalysis) |
7055 |
if (MSSAAnalysis) |
| 7056 |
MSSA = &MSSAAnalysis->getMSSA(); |
7056 |
MSSA = &MSSAAnalysis->getMSSA(); |
| 7057 |
return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA); |
7057 |
return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA); |
| 7058 |
} |
7058 |
} |
| 7059 |
|
7059 |
|
| 7060 |
PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, |
7060 |
PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, |
| 7061 |
LoopStandardAnalysisResults &AR, |
7061 |
LoopStandardAnalysisResults &AR, |
| 7062 |
LPMUpdater &) { |
7062 |
LPMUpdater &) { |
| 7063 |
if (!ReduceLoopStrength(&L, AM.getResult(L, AR), AR.SE, |
7063 |
if (!ReduceLoopStrength(&L, AM.getResult(L, AR), AR.SE, |
| 7064 |
AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA)) |
7064 |
AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA)) |
| 7065 |
return PreservedAnalyses::all(); |
7065 |
return PreservedAnalyses::all(); |
| 7066 |
|
7066 |
|
| 7067 |
auto PA = getLoopPassPreservedAnalyses(); |
7067 |
auto PA = getLoopPassPreservedAnalyses(); |
| 7068 |
if (AR.MSSA) |
7068 |
if (AR.MSSA) |
| 7069 |
PA.preserve(); |
7069 |
PA.preserve(); |
| 7070 |
return PA; |
7070 |
return PA; |
| 7071 |
} |
7071 |
} |
| 7072 |
|
7072 |
|
| 7073 |
char LoopStrengthReduce::ID = 0; |
7073 |
char LoopStrengthReduce::ID = 0; |
| 7074 |
|
7074 |
|
| 7075 |
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", |
7075 |
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", |
| 7076 |
"Loop Strength Reduction", false, false) |
7076 |
"Loop Strength Reduction", false, false) |
| 7077 |
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
7077 |
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
| 7078 |
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
7078 |
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
| 7079 |
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) |
7079 |
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) |
| 7080 |
INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass) |
7080 |
INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass) |
| 7081 |
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
7081 |
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
| 7082 |
INITIALIZE_PASS_DEPENDENCY(LoopSimplify) |
7082 |
INITIALIZE_PASS_DEPENDENCY(LoopSimplify) |
| 7083 |
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", |
7083 |
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", |
| 7084 |
"Loop Strength Reduction", false, false) |
7084 |
"Loop Strength Reduction", false, false) |
| 7085 |
|
7085 |
|
| 7086 |
Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); } |
7086 |
Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); } |
| 7087 |
|
7087 |
|